Insmod模块加载过程分析

一．背景

　　a) 在进行JZ2440的一个小demo开发的时候，使用自己编译的内核（3.4.2）及lcd模块进行加载时，insmod会提示加载失败因为内核版本不匹配（提示当前内核版本为空），并且显示模块的内核版本为空。

　　b) 尝试过修改编译的Makefile文件的内核目录，及重新编译内核及模块并重新烧写，均无效。

　　c) 网上方法，使用统一的gcc编译文件系统同样无效，编译较新版本的busybox后命令可以成功使用。

　　d) 开始着手分析insmod加载过程，希望发现真正原因

　　e) 内核模块编译时尝试绕过insmod的版本检查（尚未实验）

二．概述

　　模块是作为ELF对象文件存放在文件系统中的，并通过执行insmod程序链接到内核中。对于每个模块，系统都要分配一个包含以下数据结构的内存区。

　　一个module对象，表示模块名的一个以null结束的字符串，实现模块功能的代码。在2.6内核以前，insmod模块过程主要是通过modutils中的insmod加载，大量工作都是在用户空间完成。但在2.6内核以后，系统使用busybox的insmod指令，把大量工作移到内核代码处理，无论逻辑上还是代码量上都比原来精简了很多，通过busybox的insmod命令与内核进行接入。

三.insmod调用过程分析

　　入口函数在busybox的insmod.c文件中

int insmod_main(int argc UNUSED_PARAM, char **argv)

{

	char *filename;

	int rc;

	/* Compat note:

	 * 2.6 style insmod has no options and required filename

	 * (not module name - .ko can't be omitted).

	 * 2.4 style insmod can take module name without .o

	 * and performs module search in default directories

	 * or in $MODPATH.

	 */

	IF_FEATURE_2_4_MODULES(

		getopt32(argv, INSMOD_OPTS INSMOD_ARGS);

		argv += optind - 1;

	);

	//去的加载模块的路径名

	filename = *++argv;

	if (!filename)

		bb_show_usage();

	rc = bb_init_module(filename, parse_cmdline_module_options(argv, /*quote_spaces:*/ 0));

	if (rc)

		bb_error_msg("can't insert '%s': %s", filename, moderror(rc));

	return rc;

}

初始化函数bb_init_module中调用的函数parse_cmdline_module_options用来parse传入参数中的模块相关参数（文件为modutils.c）　　

char* FAST_FUNC parse_cmdline_module_options(char **argv, int quote_spaces)

{

    char *options;

    int optlen;

    options = xzalloc();

    optlen = ;

    //便利模块名后面的模块参数

    while (*++argv) {

        const char *fmt;

        const char *var;

        const char *val;

        var = *argv;

//为option分配空间

        options = xrealloc(options, optlen +  + strlen(var) + );

        fmt = "%.*s%s ";

        val = strchrnul(var, '=');

        if (quote_spaces) {

            /*

             * modprobe (module-init-tools version 3.11.1) compat:

             * quote only value:

             * var="val with spaces", not "var=val with spaces"

             * (note: var *name* is not checked for spaces!)

             */

            if (*val) { /* has var=val format. skip '=' */

                val++;

                if (strchr(val, ' '))

                    fmt = "%.*s\"%s\" ";

            }

        }

        optlen += sprintf(options + optlen, fmt, (int)(val - var), var, val);

    }

    /* Remove trailing space. Disabled */

    /* if (optlen != 0) options[optlen-1] = '\0'; */

    return options;

}

　　初始化函数bb_init_module会通过系统调用，调用内核的sys_init_module(syscalls.h声明，实现在module.c)

/* Return:

 * 0 on success,

 * -errno on open/read error,

 * errno on init_module() error

 */

int FAST_FUNC bb_init_module(const char *filename, const char *options)

{

    size_t image_size;

    char *image;

    int rc;

    bool mmaped;

    if (!options)

        options = "";

//TODO: audit bb_init_module_24 to match error code convention

#if ENABLE_FEATURE_2_4_MODULES

    if (get_linux_version_code() < KERNEL_VERSION(,,))

        return bb_init_module_24(filename, options);

#endif

    image_size = INT_MAX - ;

    mmaped = ;

    image = try_to_mmap_module(filename, &image_size);

    if (image) {

        mmaped = ;

    } else {

        errno = ENOMEM; /* may be changed by e.g. open errors below */

        image = xmalloc_open_zipped_read_close(filename, &image_size);

        if (!image)

            return -errno;

    }

    errno = ;

//调用内核的系统调用

    init_module(image, image_size, options);

    rc = errno;

    if (mmaped)

        munmap(image, image_size);

    else

        free(image);

    return rc;

}

　　系统调用在内核中的实现（系统调用的调用过程分析以后补上）：

SYSCALL_DEFINE3(init_module, void __user *, umod,

        unsigned long, len, const char __user *, uargs)

{

    int err;

    struct load_info info = { };

    err = may_init_module();

    if (err)

        return err;

    pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",

           umod, len, uargs);

    err = copy_module_from_user(umod, len, &info);

    if (err)

        return err;

    return load_module(&info, uargs, );

}

四.内核中的相关结构体

　　以Linux-3.8.2为例，相关结构定义代码在include/linux/module.h中。

模块依赖关系

struct module_use {

    struct list_head source_list;

    struct list_head target_list;

    struct module *source, *target;

};

　　2.模块状态信息

enum module_state {

    MODULE_STATE_LIVE,    /* Normal state. */

    MODULE_STATE_COMING,    /* Full formed, running module_init. */

    MODULE_STATE_GOING,    /* Going away. */

    MODULE_STATE_UNFORMED,    /* Still setting it up. */

};

　　3.模块计数

/**

 * struct module_ref - per cpu module reference counts

 * @incs: number of module get on this cpu

 * @decs: number of module put on this cpu

*/

struct module_ref {

    unsigned long incs;

    unsigned long decs;

} __attribute((aligned( * sizeof(unsigned long))));

　　4.module结构（一个长度可怕的结构）

　　module对象描述一个模块。一个双向循环链表存放所有module对象，链表头部存放在modules变量中，而指向相邻单元的指针存放在每个module对象的list字段中。

Struct module

{

    enum module_state state;    //存放模块当前状态

                            //装载期间状态为MODULE_STATE_COMING.

                            //正常运行后，状态变为 MODULE_STATE_LIVE

                            //正在卸载时，状态为 MODULE_STATE_GOING

    /* Member of list of modules */

    struct list_head list;        //模块链表指针，所有加载的模块保存在双向链表中，链表头部为定义的全局变量modules。

    /* Unique handle for this module */

    char name[MODULE_NAME_LEN];        //模块名称

    /* Sysfs stuff. */

    struct module_kobject mkobj;

    struct module_attribute *modinfo_attrs;

    const char *version;

    const char *srcversion;

    struct kobject *holders_dir;

    /* Exported symbols */

/*这三个用于管理模块导出符号，syms是一个数组，有num_syms个数组项，数组项类型为kernel_symbol，负责将标识符（name）分配到内存地址（value）

 struct kernel_symbol 

 { 

     unsigned long value; 

     const char *name; 

 }; 

    //crcs也是一个num_syms个数组项的数组，存储了导出符号的校验和，用于实现版本控制

*/

        const struct kernel_symbol *syms;        //指向导出符号数组的指针

    const unsigned long *crcs;            //指向导出符号CRC值数组的指针

    unsigned int num_syms;                //导出符号数目

    /* Kernel parameters. */

    struct kernel_param *kp;    //内核参数

    unsigned int num_kp;        //内核参数个数

    /* GPL-only exported symbols. */

 /*在导出符号时，内核不仅考虑了可以有所有模块（不考虑许可证类型）使用的符号，还要考虑只能由 GPL 兼容模块使用的符号。 第三类的符号当前仍然可以有任意许可证的模块使用，但在不久的将来也会转变为只适用于 GPL 模块。gpl_syms,num_gpl_syms,gpl_crcs 成员用于只提供给 GPL 模块的符号；gpl_future_syms,num_gpl_future_syms,gpl_future_crcs 用于将来只提供给 GPL 模块的符号。unused_gpl_syms 和 unused_syms 以及对应的计数器和校验和成员描述。 这两个数组用于存储（只适用于 GPL）已经导出， 但 in-tree 模块未使用的符号。在out-of-tree 模块使用此类型符号时，内核将输出一个警告消息。 

*/ 

 unsigned int num_gpl_syms;            //GPL格式导出符号数

 const struct kernel_symbol *gpl_syms;    //指向GPL格式导出符号数组的指针

 const unsigned long *gpl_crcs;        //指向GPL格式导出符号CRC值数组的指针

#ifdef CONFIG_UNUSED_SYMBOLS

    /* unused exported symbols. */

    const struct kernel_symbol *unused_syms;

    const unsigned long *unused_crcs;

    unsigned int num_unused_syms;

    /* GPL-only, unused exported symbols. */

    unsigned int num_unused_gpl_syms;

    const struct kernel_symbol *unused_gpl_syms;

    const unsigned long *unused_gpl_crcs;

#endif

#ifdef CONFIG_MODULE_SIG

    /* Signature was verified. */

    bool sig_ok;

#endif

    /* symbols that will be GPL-only in the near future. */

    const struct kernel_symbol *gpl_future_syms;

    const unsigned long *gpl_future_crcs;

    unsigned int num_gpl_future_syms;

/* Exception table */

 /*如果模块定义了新的异常，异常的描述保存在 extable数组中。 num_exentries 指定了数组的长度。 */

    unsigned int num_exentries;

    struct exception_table_entry *extable;

  /*模块的二进制数据分为两个部分；初始化部分和核心部分。 

 前者包含的数据在转载结束后都可以丢弃（例如：初始化函数），后者包含了正常运行期间需要的所有数据。   

 初始化部分的起始地址保存在 module_init,长度为 init_size 字节； 

 核心部分有 module_core 和 core_size 描述。 

 */

    /* Startup function. */

    int (*init)(void);

    /* If this is non-NULL, vfree after init() returns */

    void *module_init; //用于模块初始化的动态内存区指针

    /* Here is the actual code + data, vfree'd on unload. */

    void *module_core; //用于模块核心函数与数据结构的动态内存区指针

    /* Here are the sizes of the init and core sections */

    unsigned int init_size, core_size;  //用于模块初始化的动态内存区大小和用于模块核心函数与数据结构的动态内存区指针

    /* The size of the executable code in each section.  */

 //模块初始化的可执行代码大小，模块核心可执行代码大小，只当模块链接时使用

    unsigned int init_text_size, core_text_size;

    /* Size of RO sections of the module (text+rodata) */

    unsigned int init_ro_size, core_ro_size;

    /* Arch-specific module values */

    struct mod_arch_specific arch; //依赖于体系结构的字段

 /*如果模块会污染内核，则设置 taints.污染意味着内核怀疑该模块做了一个有害的事情，可能妨碍内核的正常运作。 

 如果发生内核恐慌（在发生致命的内部错误，无法恢复正常运作时，将触发内核恐慌），那么错误诊断也会包含为什么内核被污染的有关信息。 

 这有助于开发者区分来自正常运行系统的错误报告和包含某些可疑因素的系统错误。 

 add_taint_module 函数用来设置 struct module 的给定实例的 taints 成员。  

模块可能因两个原因污染内核： 

 1，如果模块的许可证是专有的，或不兼容 GPL，那么在模块载入内核时，会使用 TAINT_PROPRIETARY_MODULE. 

   由于专有模块的源码可能弄不到，模块在内核中作的任何事情都无法跟踪，因此，bug 很可能是由模块引入的。 

  

   内核提供了函数 license_is_gpl_compatible 来判断给定的许可证是否与 GPL 兼容。 

 2，TAINT_FORCED_MODULE 表示该模块是强制装载的。如果模块中没有提供版本信息，也称为版本魔术（version magic）， 

   或模块和内核某些符号的版本不一致，那么可以请求强制装载。  

 */

    unsigned int taints;    /* same bits as kernel:tainted */

#ifdef CONFIG_GENERIC_BUG

    /* Support for BUG */

    unsigned num_bugs;

    struct list_head bug_list;

    struct bug_entry *bug_table;

#endif

#ifdef CONFIG_KALLSYMS

    /*

     * We keep the symbol and string tables for kallsyms.

     * The core_* fields below are temporary, loader-only (they

     * could really be discarded after module init).

     */

    Elf_Sym *symtab, *core_symtab;

    unsigned int num_symtab, core_num_syms;

    char *strtab, *core_strtab;

    /* Section attributes */

    struct module_sect_attrs *sect_attrs;

    /* Notes attributes */

    struct module_notes_attrs *notes_attrs;

#endif

    /* The command line arguments (may be mangled).  People like

       keeping pointers to this stuff */

    char *args;

#ifdef CONFIG_SMP

    /* Per-cpu data. */

    void __percpu *percpu;

    unsigned int percpu_size;

#endif

#ifdef CONFIG_TRACEPOINTS

    unsigned int num_tracepoints;

    struct tracepoint * const *tracepoints_ptrs;

#endif

#ifdef HAVE_JUMP_LABEL

    struct jump_entry *jump_entries;

    unsigned int num_jump_entries;

#endif

#ifdef CONFIG_TRACING

    unsigned int num_trace_bprintk_fmt;

    const char **trace_bprintk_fmt_start;

#endif

#ifdef CONFIG_EVENT_TRACING

    struct ftrace_event_call **trace_events;

    unsigned int num_trace_events;

#endif

#ifdef CONFIG_FTRACE_MCOUNT_RECORD

    unsigned int num_ftrace_callsites;

    unsigned long *ftrace_callsites;

#endif

#ifdef CONFIG_MODULE_UNLOAD

    /* What modules depend on me? */

    struct list_head source_list;

    /* What modules do I depend on? */

    struct list_head target_list;

    /* Who is waiting for us to be unloaded */

    struct task_struct *waiter; //正卸载模块的进程

    /* Destruction function. */

    void (*exit)(void);

 /*module_ref 用于引用计数。系统中的每个 CPU，都对应到该数组中的数组项。该项指定了系统中有多少地方使用了该模块。 

内核提供了 try_module_get 和 module_put 函数，用对引用计数器加1或减1，如果调用者确信相关模块当前没有被卸载， 

也可以使用 __module_get 对引用计数加 1.相反，try_module_get 会确认模块确实已经加载。

 struct module_ref { 

   unsigned int incs; 

   unsigned int decs; 

  } 

*/ 

    struct module_ref __percpu *refptr; //模块计数器，每个cpu一个

#endif

#ifdef CONFIG_CONSTRUCTORS

    /* Constructor functions. */

    ctor_fn_t *ctors;

    unsigned int num_ctors;

#endif

}

五.模块链接过程

　　用户可以通过执行insmod外部程序把一个模块链接到正在运行的内核中。该过程执行以下操作：
　　1.从命令行中读取要链接的模块名
　　2.确定模块对象代码所在的文件在系统目录树中的位置。
　　3.从磁盘读入存有模块目标代码的文件。
　　4.调用init_module()系统调用。函数将模块二进制文件复制到内核，然后由内核完成剩余的任务。
　　5.init_module函数通过系统调用层，进入内核到达内核函数 sys_init_module，这是加载模块的主要函数。
　　6.结束。