可执行文件加载执行过程:

上一节我们说到ELF文件格式,静态库的符号解析和重定位的内容。这一节我们来分析一下可执行文件。

由上一节我们知道可执行文件也是ELF文件,当程序被加载器加载到内存时是按照ELF格式去解析,然后把可执行文件的不同节加载到虚拟地址空间中。我们看一下32位下的进程虚拟地址模型:

可执行文件的信息被加载到了虚拟地址空间。根据ELF信息找到程序的入口地址就可以执行进程了。我们看一下linux 3.2 内核代码 fs/exec.c中调用可执行文件的大致过程。

static int do_execve_common(const char *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
struct pt_regs *regs)
{
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
bool clear_in_exec;
int retval;
const struct cred *cred = current_cred();
...
...
retval = search_binary_handler(bprm,regs); //搜索可执行文件处理接口
if (retval < )
goto out;
} int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
{
unsigned int depth = bprm->recursion_depth;
int try,retval;
struct linux_binfmt *fmt;
pid_t old_pid; .......
retval = -ENOENT;
for (try=; try<; try++) {
read_lock(&binfmt_lock);
list_for_each_entry(fmt, &formats, lh) {
int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
//找到可执行文件加载入口
if (!fn)
continue;
}
}
} typedef struct elf64_hdr {
unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */
Elf64_Half e_type;
Elf64_Half e_machine;
Elf64_Word e_version;
Elf64_Addr e_entry; /* Entry point virtual address */
Elf64_Off e_phoff; /* Program header table file offset */
Elf64_Off e_shoff; /* Section header table file offset */
Elf64_Word e_flags;
Elf64_Half e_ehsize;
Elf64_Half e_phentsize;
Elf64_Half e_phnum;
Elf64_Half e_shentsize;
Elf64_Half e_shnum;
Elf64_Half e_shstrndx;
} Elf64_Ehdr; load_binary 可执行目标文件对应 binfmt_elf.c 文件
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
{
//解析elf文件 做段映射
struct {
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
} *loc; loc = kmalloc(sizeof(*loc), GFP_KERNEL);
if (!loc) {
retval = -ENOMEM;
goto out_ret;
} /* Get the exec-header */
loc->elf_ex = *((struct elfhdr *)bprm->buf); retval = -ENOEXEC;
/* First of all, some simple consistency checks */
if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != )
goto out; if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
goto out;
if (!elf_check_arch(&loc->elf_ex))
goto out;
if (!bprm->file->f_op || !bprm->file->f_op->mmap)
goto out; /* Now read in all of the header information */
if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
goto out;
if (loc->elf_ex.e_phnum < ||
loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
goto out;
size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
retval = -ENOMEM;
elf_phdata = kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out; retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
(char *)elf_phdata, size);
if (retval != size) {
if (retval >= )
retval = -EIO;
goto out_free_ph;
} elf_ppnt = elf_phdata;
elf_bss = ;
elf_brk = ; start_code = ~0UL;
end_code = ;
start_data = ;
end_data = ; for (i = ; i < loc->elf_ex.e_phnum; i++) {
if (elf_ppnt->p_type == PT_INTERP) {
/* This is the program interpreter used for
* shared libraries - for now assume that this
* is an a.out format binary
*/
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX ||
elf_ppnt->p_filesz < )
goto out_free_ph; retval = -ENOMEM;
elf_interpreter = kmalloc(elf_ppnt->p_filesz,
GFP_KERNEL);
if (!elf_interpreter)
goto out_free_ph; retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
elf_ppnt->p_filesz);
if (retval != elf_ppnt->p_filesz) {
if (retval >= )
retval = -EIO;
goto out_free_interp;
}
/* make sure path is NULL terminated */
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - ] != '\0')
goto out_free_interp; interpreter = open_exec(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_interp; /*
* If the binary is not readable then enforce
* mm->dumpable = 0 regardless of the interpreter's
* permissions.
*/
would_dump(bprm, interpreter); /* Get the exec headers */
retval = kernel_read(interpreter, ,
(void *)&loc->interp_elf_ex,
sizeof(loc->interp_elf_ex));
if (retval != sizeof(loc->interp_elf_ex)) {
if (retval >= )
retval = -EIO;
goto out_free_dentry;
} break;
}
elf_ppnt++;
} elf_ppnt = elf_phdata;
for (i = ; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
if (elf_ppnt->p_type == PT_GNU_STACK) {
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
} /* Some simple consistency checks for the interpreter */
if (elf_interpreter) {
retval = -ELIBBAD;
/* Not an ELF interpreter */
if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != )
goto out_free_dentry;
/* Verify the interpreter has a valid arch */
if (!elf_check_arch(&loc->interp_elf_ex))
goto out_free_dentry;
} /* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);
if (retval)
goto out_free_dentry; /* OK, This is the point of no return */
current->flags &= ~PF_FORKNOEXEC;
current->mm->def_flags = def_flags; /* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
SET_PERSONALITY(loc->elf_ex);
if (elf_read_implies_exec(loc->elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC; if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); /* Do this so that we can load the interpreter, if need be. We will
change some of these later */
current->mm->free_area_cache = current->mm->mmap_base;
current->mm->cached_hole_size = ;
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval < ) {
send_sig(SIGKILL, current, );
goto out_free_dentry;
} current->mm->start_stack = bprm->p; /* Now we do a little grungy work by mmapping the ELF image into
the correct location in memory. */
for(i = , elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = , elf_flags;
unsigned long k, vaddr;
unsigned long total_size = ; if (elf_ppnt->p_type != PT_LOAD)
continue; if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte; /* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
retval = set_brk(elf_bss + load_bias,
elf_brk + load_bias);
if (retval) {
send_sig(SIGKILL, current, );
goto out_free_dentry;
}
nbyte = ELF_PAGEOFFSET(elf_bss);
if (nbyte) {
nbyte = ELF_MIN_ALIGN - nbyte;
if (nbyte > elf_brk - elf_bss)
nbyte = elf_brk - elf_bss;
if (clear_user((void __user *)elf_bss +
load_bias, nbyte)) {
/*
* This bss-zeroing can fail if the ELF
* file specifies odd protections. So
* we don't check the return value
*/
}
}
} if (elf_ppnt->p_flags & PF_R)
elf_prot |= PROT_READ;
if (elf_ppnt->p_flags & PF_W)
elf_prot |= PROT_WRITE;
if (elf_ppnt->p_flags & PF_X)
elf_prot |= PROT_EXEC; elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; vaddr = elf_ppnt->p_vaddr;
if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
elf_flags |= MAP_FIXED;
} else if (loc->elf_ex.e_type == ET_DYN) {
/* Try and get dynamic programs out of the way of the
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
#if defined(CONFIG_X86) || defined(CONFIG_ARM)
/* Memory randomization might have been switched off
* in runtime via sysctl.
* If that is the case, retain the original non-zero
* load_bias value in order to establish proper
* non-randomized mappings.
*/
if (current->flags & PF_RANDOMIZE)
load_bias = ;
else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#endif
total_size = total_mapping_size(elf_phdata,
loc->elf_ex.e_phnum);
if (!total_size) {
retval = -EINVAL;
goto out_free_dentry;
}
} error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, );
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
} if (!load_addr_set) {
load_addr_set = ;
load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
if (loc->elf_ex.e_type == ET_DYN) {
load_bias += error -
ELF_PAGESTART(load_bias + vaddr);
load_addr += load_bias;
reloc_func_desc = load_bias;
}
}
k = elf_ppnt->p_vaddr;
if (k < start_code)
start_code = k;
if (start_data < k)
start_data = k; /*
* Check to see if the section's size will overflow the
* allowed task size. Note that p_filesz must always be
* <= p_memsz so it is only necessary to check p_memsz.
*/
if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
elf_ppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - elf_ppnt->p_memsz < k) {
/* set_brk can never work. Avoid overflows. */
send_sig(SIGKILL, current, );
retval = -EINVAL;
goto out_free_dentry;
} k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; if (k > elf_bss)
elf_bss = k;
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
if (k > elf_brk)
elf_brk = k;
} loc->elf_ex.e_entry += load_bias;
elf_bss += load_bias;
elf_brk += load_bias;
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias; /* Calling set_brk effectively mmaps the pages that we need
* for the bss and break sections. We must do this before
* mapping in the interpreter, to make sure it doesn't wind
* up getting placed where the bss needs to go.
*/
retval = set_brk(elf_bss, elf_brk);
if (retval) {
send_sig(SIGKILL, current, );
goto out_free_dentry;
}
if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
send_sig(SIGSEGV, current, );
retval = -EFAULT; /* Nobody gets to see this, but.. */
goto out_free_dentry;
} if (elf_interpreter) {
unsigned long uninitialized_var(interp_map_addr); elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
&interp_map_addr,
load_bias);
if (!IS_ERR((void *)elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
*/
interp_load_addr = elf_entry;
elf_entry += loc->interp_elf_ex.e_entry;
}
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = IS_ERR((void *)elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
reloc_func_desc = interp_load_addr; allow_write_access(interpreter);
fput(interpreter);
kfree(elf_interpreter);
} else {
elf_entry = loc->elf_ex.e_entry;
if (BAD_ADDR(elf_entry)) {
force_sig(SIGSEGV, current);
retval = -EINVAL;
goto out_free_dentry;
}
} kfree(elf_phdata); set_binfmt(&elf_format); #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
if (retval < ) {
send_sig(SIGKILL, current, );
goto out;
}
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
if (retval < ) {
send_sig(SIGKILL, current, );
goto out;
}
/* N.B. passed_fileno might not be initialized? */
current->mm->end_code = end_code;
current->mm->start_code = start_code;
current->mm->start_data = start_data;
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p; #ifdef arch_randomize_brk
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > )) {
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
#ifdef CONFIG_COMPAT_BRK
current->brk_randomized = ;
#endif
}
#endif if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
and some applications "depend" upon this behavior.
Since we do not have the power to recompile these, we
emulate the SVr4 behavior. Sigh. */
down_write(&current->mm->mmap_sem);
error = do_mmap(NULL, , PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, );
up_write(&current->mm->mmap_sem);
}
//开始执行新进程
start_thread(regs, elf_entry, bprm->p);
retval = ;
out:
kfree(loc);
out_ret:
return retval; /* error cleanup */
out_free_dentry:
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
out_free_interp:
kfree(elf_interpreter);
out_free_ph:
kfree(elf_phdata);
goto out;
}

我们仔细看一下这里

        elf_phdata = kmalloc(size, GFP_KERNEL);
if (!elf_phdata)
goto out;
//读取可执行文件的程序头
retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
(char *)elf_phdata, size);
if (retval != size) {
if (retval >= )
retval = -EIO;
goto out_free_ph;
}

readelf 看一下可执行文件的程序头(x86_64)

程序头描述了可执行文件到虚拟地址的映射关系。 这里还可以看到INTERP段 请求解释器/lib64/ld-linux-x86-64.so.2.  它被用来加载和定位动态库, 这里的可执行文件对标准c库是动态链接, 所以会请求这个文件。

for (i = ; i < loc->elf_ex.e_phnum; i++) {
//检查是否有需要加载的解释器
if (elf_ppnt->p_type == PT_INTERP) {
/* This is the program interpreter used for
* shared libraries - for now assume that this
* is an a.out format binary
*/
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX ||
elf_ppnt->p_filesz < )
goto out_free_ph; retval = -ENOMEM;
elf_interpreter = kmalloc(elf_ppnt->p_filesz,
GFP_KERNEL);
if (!elf_interpreter)
goto out_free_ph;
//根据其位置和大小把整个"解释器"段的内容读入缓冲区
retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
elf_ppnt->p_filesz);
if (retval != elf_ppnt->p_filesz) {
if (retval >= )
retval = -EIO;
goto out_free_interp;
}
/* make sure path is NULL terminated */
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - ] != '\0')
goto out_free_interp;
//打开解释器
interpreter = open_exec(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_interp; /*
* If the binary is not readable then enforce
* mm->dumpable = 0 regardless of the interpreter's
* permissions.
*/
would_dump(bprm, interpreter); /* Get the exec headers */
//得到解释器头部
retval = kernel_read(interpreter, ,
(void *)&loc->interp_elf_ex,
sizeof(loc->interp_elf_ex));
if (retval != sizeof(loc->interp_elf_ex)) {
if (retval >= )
retval = -EIO;
goto out_free_dentry;
} break;
}
elf_ppnt++;
}
   //PT_GNU_STACK 判断栈中是否有可执行代码
  ... .... .... ..

然后结下来的工作就是检查和确定装入地址开始装入了

/* Now we do a little grungy work by mmapping the ELF image into
the correct location in memory. */
for(i = , elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = , elf_flags;
unsigned long k, vaddr;
unsigned long total_size = ; if (elf_ppnt->p_type != PT_LOAD)
continue; if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
。。。。。。。。
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, );
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
//从elf中读取程序入口地址 开始执行
start_thread(regs, elf_entry, bprm->p);
retval = ;
}

执行的过程:

当加载器执行时, 就会创建上图所示虚拟存储器镜像(图为32位系统)。在程序段头表的指导下将可执行文件的代码段和数据段映射到虚拟地址的虚拟地址空间,接下来加载器跳转到_start 入口处依次执行  : __libc_init_first  ------>  _init  -------> atexit -------> main ------>_exit   首先初始化c库。调用初始化代码,注册退出函数  执行main 函数 最后执行退出函数。关于这块可以参考: http://blog.chinaunix.net/uid-24774106-id-3450789.html

到这里可执行文件执行过程已经简单了解了,整个过程与进程的创建执行有关,加载分析ELF并不复杂,其中还牵涉到动态库的链接和使用后续会单独看一下动态库加载过程。

Linux 链接详解(2)的更多相关文章

  1. Linux 链接详解----静态链接实例分析

    由Linux链接详解(1)中我们简单的分析了静态库的引用解析和重定位的内容, 下面我们结合实例来看一下静态链接重定位过程. /* * a.c */ ; void add(int c); int mai ...

  2. Linux 链接详解----动态链接库

    静态库的缺点: 库函数被包含在每一个运行的进程中,会造成主存的浪费. 目标文件的size过大 每次更新一个模块都需要重新编译,更新困难,使用不方便. 动态库: 是一个目标文件,包含代码和数据,它可以在 ...

  3. Linux 链接详解(1)

    可执行文件的生成过程: hello.c ----预处理--->  hello.i ----编译----> hello.s -----汇编-----> hello.o -----链接- ...

  4. Linux命令详解之—pwd命令

    Linux的pwd命令也是一个非常常用的命令,本文为大家介绍下Linux中pwd命令的用法. 更多Linux命令详情请看:Linux命令速查手册 Linux pwd命令用于显示工作目录. 执行pwd指 ...

  5. Linux 系统结构详解

    Linux 系统结构详解 Linux系统一般有4个主要部分: 内核.shell.文件系统和应用程序.内核.shell和文件系统一起形成了基本的操作系统结构,它们使得用户可以运行程序.管理文件并使用系统 ...

  6. Linux权限详解 命令之 chmod:修改权限

    权限简介 Linux系统上对文件的权限有着严格的控制,用于如果相对某个文件执行某种操作,必须具有对应的权限方可执行成功. Linux下文件的权限类型一般包括读,写,执行.对应字母为 r.w.x. Li ...

  7. Linux 目录详解 树状目录结构图

    1.树状目录结构图 2./目录 目录 描述 / 第一层次结构的根.整个文件系统层次结构的根目录. /bin/ 需要在单用户模式可用的必要命令(可执行文件):面向所有用户,例如:cat.ls.cp,和/ ...

  8. [转帖]Linux文件系统详解

    Linux文件系统详解 https://www.cnblogs.com/alantu2018/p/8461749.html 贼复杂.. 从操作系统的角度详解Linux文件系统层次.文件系统分类.文件系 ...

  9. Linux命令详解之—tail命令

    tail命令也是一个非常常用的文件查看类的命令,今天就为大家介绍下Linux tail命令的用法. 更多Linux命令详情请看:Linux命令速查手册 Linux tail命令主要用来从指定点开始将文 ...

随机推荐

  1. 读书笔记之《深入理解Java虚拟机》不完全学习总结

    写在前面: 之所以称作不完全总结,因为我其实没有完完全全地看完此书,但是涵盖了大部分重要章节:同时以下总结是我自己认为很重要知识,细枝末节处难免遗漏,还请详细参考原著. 转载请注明原文出处:http: ...

  2. MongoDB备份恢复与导出导入

    说明:本文所有操作均在win7下的MongoDB3.4.4版本中进行. 一.备份与恢复 1. 备份: mongodump -h IP --port 端口 -u 用户名 -p 密码 -d数据库 -o 文 ...

  3. LeetCode 105. Construct Binary Tree from Preorder and Inorder Traversal (用先序和中序树遍历来建立二叉树)

    Given preorder and inorder traversal of a tree, construct the binary tree. Note:You may assume that ...

  4. H - Pair: normal and paranormal URAL - 2019

    If you find yourself in Nevada at an abandoned nuclear range during Halloween time, you’ll become a  ...

  5. Can you solve this equation?

    Problem Description Now,given the equation 8*x^4 + 7*x^3 + 2*x^2 + 3*x + 6 == Y,can you find its sol ...

  6. 交换知识 VLAN VTP STP 单臂路由

    第1章 交换基础 1.1 园区网分层结构 层次 作用 出口层 广域网接入 出口策略 带宽控制 核心层 高速转发 服务器接入 路由选择 汇聚层 流量汇聚 链路冗余 设备冗余 路由选择 接入层 用户接入 ...

  7. struts2运行过程(图解)

    .................................................................................................... ...

  8. [转]移动前端开发之viewport的深入理解

    今天去面试,被问到一个用了一万次的东西,然而我并不了解具体是个毛毛,看这一篇豁然开朗. DevicePixelRatio 以及这句话:移动设备上的viewport分为layout viewport  ...

  9. Linux系列教程(十八)——Linux文件系统管理之文件系统常用命令

    通过前面两篇博客,我们介绍了Linux系统的权限管理.Linux权限管理之ACL权限 介绍了通过设定 ACL 权限,我们为某个用户指定某个文件的特定权限,这在Linux只能对于一个文件只能有所有者权限 ...

  10. #UnityTips# 2017.11.14

    hi,all.最近比较忙,所以更新也比较慢了. 今天就来和大家分享一个小Tip,它是关于UGUI的坑的. 使用过UGUI的朋友们都知道,Canvas的渲染方式有三种: Screen Space Ove ...