Android Hook框架adbi的分析(1)---注入工具hijack

本文博客地址：http://blog.csdn.net/qq1084283172/article/details/74055505

一、Android Hook框架adbi的基本介绍

adbi是Android平台的inline Hook框架，和前面博客中提到的libinject和poison注入工具原理差不多，要说具体的相似性的话，poison注入工具可以替换此处adbi的hijack注入工具实现so的注入和函数的Hook。adbi Hook框架和前面poison注入工具实现函数Hook的原理是一样的，先通过进程注入工具将so库文件注入到指定的目标pid进程中，函数Hook的执行在so库文件注入到目标pid中即so库文件被目标pid进程加载的时候执行的，具体的原理可以参考前面的博客《Android so注入(inject)和Hook(挂钩)的实现思路讨论》,在so库文件加载的时候，会首先执行.init段的构造函数，该构造函数的定义方法为:

void __attribute__((constructor)) x_init(void)

当我们向目标pid进程注入so库文件时，会最先执行该x_init函数，因此可以实现Hook目标pid进程函数的目的，该x_init函数唯一的不足就是不能传递函数参数。

Android Hook框架adbi的源码地址：https://github.com/crmulliner/adbi 。

Android Hook框架adbi的源码结构和功能示意图如下：

二、hijack注入工具的工作步骤

默认的约定

.目标pid进程：远程被注入so库文件的目标pid进程

.本地进程：当前so注入工具hijack所在的进程

.Android系统跨进程so库文件注入能够实现的前提：在获取到root权限的条件下

获取目标pid进程的libc库中mprotect函数的远程调用地址；
获取被注入的目标pid进程中dlopen函数的远程调用地址；
ptrace附加到被注入的目标pid进程中,等待附加目标pid进程成功完成；
获取目标pid进程被ptrace时寄存器的状态值并保存，用于后面目标pid进程环境的恢复还原；
通过修改目标pid进程被ptrace时的栈指针寄存器SP，实现在目标pid进程中申请内存空间；
将需要注入的so库文件的路径和实现在目标pid进程中调用dlopen函数加载so库文件的shellcode写入到 步骤5 中申请的栈内存空间中;
修改获取到目标pid进程被ptrace时，R0、R1、R2寄存器的值为mprotect函数被调用时的参数值，设置程序指令计数寄存器PC的值为步骤1中，获取到的目标pid进程的mprotect函数的远程调用地址；
修改获取到目标pid进程被ptrace时的LR寄存器的值为步骤6中提到的实现在目标pid进程中调用dlopen函数加载so库文件的shellcode地址；
设置目标pid进程被ptrace时寄存器的环境状态值即上面步骤7、步骤8提到的操作；
再次调用ptrace函数释放对目标pid进程的附加，目标pid进程得以继续执行(但是寄存器环境已经改变)，因此基于上面的操作实现了在目标pid进程中调用mprotect函数修改存放shellcode的堆栈为可读可写可执行，并在mprotect函数返回时实现了执行调用目标pid进程的dlopen函数加载so库文件的shellcode代码，从而实现了跨进程so库文件的注入.

三、注入工具hijack代码的详细分析

（1）.获取目标pid进程libc库中mprotect函数的远程调用地址

1.获取指定目标pid进程内存布局的内存地址信息和名称，相同名称的内存布局空间进行合并。

static int load_memmap(pid_t pid, struct mm *mm, int *nmmp)

{

    char raw[80000]; // increase this if needed for larger "maps"

    char name[MAX_NAME_LEN];

    char *p;

    unsigned long start, end;

    struct mm *m;

    int nmm = 0;

    int fd, rv;

    int i;

    // 格式字符串"/proc/pid/maps"

    sprintf(raw, "/proc/%d/maps", pid);

    // 获取目标pid进程的内存布局信息

    fd = open(raw, O_RDONLY);

    if (0 > fd) {

        //printf("Can't open %s for reading\n", raw);

        return -1;

    }

    // 数组清零

    memset(raw, 0, sizeof(raw));

    // 格式：400c2000-400da000 r-xp 00000000 b3:19 949        /system/lib/libm.so

    p = raw;

    while (1) {

        // 分行读取目标pid进程的内存布局信息

        rv = read(fd, p, sizeof(raw)-(p-raw));

        if (0 > rv) {

            //perror("read");

            return -1;

        }

        // 判断内存布局信息是否读取完了

        if (0 == rv)

            break;

        // 修改指向内存缓冲区raw中的指针偏移p

        p += rv;

        // 判断是否超过内存缓冲区范围

        if (p-raw >= sizeof(raw)) {

            //printf("Too many memory mapping\n");

            return -1;

        }

    }

    // 关闭文件

    close(fd);

    // 分割字符串

    p = strtok(raw, "\n");

    m = mm;

    while (p) {

        // 根据格式解析每一行内存布局信息

        // rv = sscanf函数都将返回成功转换并分配的字段数

        rv = sscanf(p, "%08lx-%08lx %*s %*s %*s %*s %s\n", &start, &end, name);

        // 继续分割字符串

        p = strtok(NULL, "\n");

        // sscanf函数前两个字段start、end匹配成功的情况即没有名称的情况

        if (rv == 2) {

            m = &mm[nmm++];

            // 内存布局起始地址

            m->start = start;

            // 内存布局结束地址

            m->end = end;

            // 设置默认内存布局名称为"[memory]"

            strcpy(m->name, MEMORY_ONLY);

            continue;

        }

        /* search backward for other mapping with same name */

        // 在前面保存的内存布局行信息中查找相同名称的内存布局

        // 例如：

        // 7739a000-7739c000 r-xp 00000000 b3:19 795        /system/lib/libOpenSLES.so

        // 7739c000-7739d000 r--p 00001000 b3:19 795        /system/lib/libOpenSLES.so

        // 7739d000-7739e000 rw-p 00002000 b3:19 795        /system/lib/libOpenSLES.so

        for (i = nmm-1; i >= 0; i--) {

            m = &mm[i];

            if (!strcmp(m->name, name))

                break;

        }

        // 进行相同名称的内存布局起始地址和结束地址的合并

        if (i >= 0) {

            if (start < m->start)

                m->start = start;

            if (end > m->end)

                m->end = end;

        } else {

            // 内存起始地址、内存结束地址、内存布局名称

            m = &mm[nmm++];

            // 内存起始地址

            m->start = start;

            // 内存结束地址

            m->end = end;

            // 内存布局名称

            strcpy(m->name, name);

        }

    }

    // 保存合并后内存布局的个数

    *nmmp = nmm;

    return 0;

}

2.通过在上面获取到的目标pid进程的内存布局信息中，匹配查找到目标lib库libn，获取目标lib库libn的内存基地址libcaddr并获取保存libn的全路径字符串（此处要查找的是目标pid进程的libc.so库文件的内存基地址和路径字符串）。

/* Find libc in MM, storing no more than LEN-1 chars of

   its name in NAME and set START to its starting

   address.  If libc cannot be found return -1 and

   leave NAME and START untouched.  Otherwise return 0

   and null-terminated NAME. */

// libn为要查找的lib库文件的名称字符串，如："libc."

static int find_libname(char *libn, char *name, int len, unsigned long *start, struct mm *mm, int nmm)

{

    int i;

    struct mm *m;

    char *p;

    // 遍历获取到的目标pid进程的内存布局的信息

    for (i = 0, m = mm; i < nmm; i++, m++) {

        // 直接跳过内存布局名称为"[memory]"的情况

        if (!strcmp(m->name, MEMORY_ONLY))

            continue;

        // 从右开始搜索'/'符号，获取内存布局的名称

        // 例如/system/lib/libdl.so，获取名称libdl.so

        p = strrchr(m->name, '/');

        // 跳过不符合要求的情况

        if (!p)

            continue;

        // 判断获取到的lib库名称是否是要查找的目标lib库名称libn

        p++;

        if (strncmp(libn, p, strlen(libn)))

            continue;

        // 获取查找的例如："libc."的长度

        p += strlen(libn);

        /* here comes our crude test -> 'libc.so' or 'libc-[0-9]' */

        // 作者并没有使用

        if (!strncmp("so", p, 2) || 1) // || (p[0] == '-' && isdigit(p[1])))

            break;

    }

    // 判断是否查找到目标lib库libn

    if (i >= nmm)

        /* not found */

        return -1;

    // 获取指定lib库文件的内存的起始地址

    *start = m->start;

    // 保存查找到的目标lib库文件的路径字符串m->name

    strncpy(name, m->name, len);

    // 判断lib库文件的路径字符串是否超过内存数组的长度

    if (strlen(m->name) >= len)

        // 进行字符串的截取

        name[len-1] = '\0';

    // 修改指定内存区域内存属性为可读可写可执行

    mprotect((void*)m->start, m->end - m->start, PROT_READ|PROT_WRITE|PROT_EXEC);

    return 0;

}

3.打开查找到的lib目标库文件（路径字符串libc）,解析该Elf文件，获取该lib目标库文件的静态库和动态库的符号表信息即”.symtab”和”.dynsym”系统符号表的信息（此处要解析和查找的为目标pid进程的libc.so库文件的系统符号表的结构体信息）,有关elf文件的解析过程可以仔细的去分析 do_load函数。

static symtab_t load_symtab(char *filename)

{

    int fd;

    symtab_t symtab;

    symtab = (symtab_t) xmalloc(sizeof(*symtab));

    memset(symtab, 0, sizeof(*symtab));

    // 打开elf文件

    fd = open(filename, O_RDONLY);

    if (0 > fd) {

        //perror("open");

        return NULL;

    }

    // 解析elf文件，获取elf的".symtab"和".dynsym"的信息结构体

    if (0 > do_load(fd, symtab)) {

        printf("Error ELF parsing %s\n", filename);

        free(symtab);

        symtab = NULL;

    }

    close(fd);

    return symtab;

}

4.在目标lib库libn的静态库和动态库的符号表查找被Hook的目标函数的RVA即相对地址偏移（此处为在目标pid进程的libc.so库文件的静态库和动态库的符号表中查找mprotect函数的相对地址偏移）。

//struct symtab {

//  struct symlist *st;    /* "static" symbols */

//  struct symlist *dyn;   /* dynamic symbols */

//};

static int lookup_sym(symtab_t s, unsigned char type,

       char *name, unsigned long *val)

{

    // 在动态系统符号表中查找获取目标函数的RVA

    if (s->dyn && !lookup2(s->dyn, type, name, val))

        return 0;

    // 在静态系统符号表中查找获取目标函数的RVA

    if (s->st && !lookup2(s->st, type, name, val))

        return 0;

    return -1;

}

5.将获取到的目标pid进程的mprotect函数的RVA和目标pid进程的libc.so库文件的基地址进行相加就得到了目标pid进程中mprotect函数的远程调用地址mprotectaddr。

static int find_name(pid_t pid, char *name, unsigned long *addr)

{

    struct mm mm[1000];

    unsigned long libcaddr;

    int nmm;

    char libc[256];

    symtab_t s;

    // 获取被注入pid进程的so库文件的名称和内存布局起始、结束地址的信息

    if (0 > load_memmap(pid, mm, &nmm)) {

        printf("cannot read memory map\n");

        return -1;

    }

    // 获取被注入pid进程的libc.so库文件的加载基地址以及libc库文件的路径

    if (0 > find_libc(libc, sizeof(libc), &libcaddr, mm, nmm)) {

        printf("cannot find libc\n");

        return -1;

    }

    // 打开查找到的lib目标库文件（路径字符串libc）解析该Elf文件

    // 获取该lib库文件的静态库和动态库的符号表信息".symtab"或者".dynsym".

    s = load_symtab(libc);

    if (!s) {

        printf("cannot read symbol table\n");

        return -1;

    }

    // 在当前进程加载的libc库文件中查找导出的 name名称函数的相对偏移RVA

    // 这个地方，有更好获取mprotect函数调用地址的方法，作者后面也用到了，不知道为什么要用这么复杂的方法?

    if (0 > lookup_func_sym(s, name, addr)) {

        printf("cannot find %s\n", name);

        return -1;

    }

    // 获取得到目标pid中 name 名称的指定函数的远程调用地址

    *addr += libcaddr;

    return 0;

}

（2）.根据so动态库的加载原理，获取到目标pid进程中dlopen函数的远程调用地址（本地进程dlopen函数的调用地址 -本地进程加载的libdl.so库的基地址 = 远程目标pid进程dlopen函数的调用地址 - 远程目标pid进程加载的libdl.so库的基地址）。

    // 加载动态库文件"/system/lib/libdl.so"

    // 甚至获取当前进程中dlopen函数的调用地址，这一步不是必须这么去做的

    void *ldl = dlopen("libdl.so", RTLD_LAZY);

    if (ldl) {

        // 获取当前进程中，dlopen函数的调用地址

        dlopenaddr = (unsigned long)dlsym(ldl, "dlopen");

        dlclose(ldl);

    }

    unsigned long int lkaddr;

    unsigned long int lkaddr2;

    // 获取当前进程中的"/system/bin/linker"的基地址

    find_linker(getpid(), &lkaddr);

    //printf("own linker: 0x%x\n", lkaddr);

    //printf("offset %x\n", dlopenaddr - lkaddr);

    // 获取被注入的pid进程中"/system/bin/linker"的基地址

    find_linker(pid, &lkaddr2);

    //printf("tgt linker: %x\n", lkaddr2);

    // 获取被注入的目标pid进程中函数dlopen的远程调用地址

    dlopenaddr = lkaddr2 + (dlopenaddr - lkaddr);

    //printf("tgt dlopen : %x\n", lkaddr2 + (dlopenaddr - lkaddr));

    if (debug)

        printf("dlopen: 0x%lx\n", dlopenaddr);

（3）.ptrace附加到被注入的目标pid进程上，获取此时目标pid进程寄存器环境的状态值。

    // ptrace附加到被注入的目标pid进程中

    if (0 > ptrace(PTRACE_ATTACH, pid, 0, 0)) {

        printf("cannot attach to %d, error!\n", pid);

        exit(1);

    }

    // 等待附加到目标pid进程完成

    waitpid(pid, NULL, 0);

    ......

    // 格式化得到字符串"/proc/pid/mem"

    sprintf(buf, "/proc/%d/mem", pid);

    // 获取被注入的目标pid进程内存中的内容

    fd = open(buf, O_WRONLY);

    if (0 > fd) {

        printf("cannot open %s, error!\n", buf);

        exit(1);

    }

    // 获取目标pid进程中此时所有寄存器的状态值

    ptrace(PTRACE_GETREGS, pid, 0, &regs);

（4）.通过抬高栈顶，减小获取到的目标pid进程的SP寄存器的值，实现在目标pid进程中申请内存空间。将需要加载的so库文件的路径字符串写入到申请的内存空间中，还将在目标pid进程中实现远程调用dlopen函数加载so库文件的shellcode代码写入到申请的内存空间中。

// dlopen函数的调用方式：void * dlopen( const char * pathname, int mode);

// pc寄存器值指向的是当前指令位置加8个字节

// codeaddr即写入的shellcode执行代码开始的位置

// 作者在目标pid进程中写入shellcode进行执行的方法不错，比较巧妙

unsigned int sc[] = {

0xe59f0040, //        ldr     r0, [pc, #64]   ; 48 <.text+0x48>-->将需要加载的so库文件的文件路径字符串的地址给r0

0xe3a01000, //        mov     r1, #0  ; 0x0                    -->即dlopen函数的mode=0

0xe1a0e00f, //        mov     lr, pc                           -->设置dlopen函数的返回地址，返回时跳到ldr sp, [pc, #44]去执行

0xe59ff038, //        ldr     pc, [pc, #56]   ; 4c <.text+0x4c>-->调用dlopen函数实现在目标pid进程中加载so库文件

0xe59fd02c, //        ldr     sp, [pc, #44]   ; 44 <.text+0x44>-->开始恢复目标pid进程被ptrace时的进程环境即恢复此时一些寄存器的值

0xe59f0010, //        ldr     r0, [pc, #16]   ; 30 <.text+0x30>-->感觉目标pid进程环境的恢复不是很完美~~~~

0xe59f1010, //        ldr     r1, [pc, #16]   ; 34 <.text+0x34>

0xe59f2010, //        ldr     r2, [pc, #16]   ; 38 <.text+0x38>

0xe59f3010, //        ldr     r3, [pc, #16]   ; 3c <.text+0x3c>

0xe59fe010, //        ldr     lr, [pc, #16]   ; 40 <.text+0x40>

0xe59ff010, //        ldr     pc, [pc, #16]   ; 44 <.text+0x44>

0xe1a00000, //        nop                     r0

0xe1a00000, //        nop                     r1

0xe1a00000, //        nop                     r2

0xe1a00000, //        nop                     r3

0xe1a00000, //        nop                     lr

0xe1a00000, //        nop                     pc

0xe1a00000, //        nop                     sp

0xe1a00000, //        nop                     addr of libname

0xe1a00000, //        nop                     dlopenaddr

};

    // setup variables of the loading and fixup code

    /*

    sc[9] = regs.ARM_r0;

    sc[10] = regs.ARM_r1;

    sc[11] = regs.ARM_lr;

    sc[12] = regs.ARM_pc;

    sc[13] = regs.ARM_sp;

    sc[15] = dlopenaddr;

    */

    // 保存目标pid进程此时所有寄存器的值（保存当前执行环境，用于还原）

    sc[11] = regs.ARM_r0;

    sc[12] = regs.ARM_r1;

    sc[13] = regs.ARM_r2;

    sc[14] = regs.ARM_r3;

    sc[15] = regs.ARM_lr;

    sc[16] = regs.ARM_pc;

    sc[17] = regs.ARM_sp;

    sc[19] = dlopenaddr;

    // 打印日志消息

    if (debug) {

        printf("pc=%lx lr=%lx sp=%lx fp=%lx\n", regs.ARM_pc, regs.ARM_lr, regs.ARM_sp, regs.ARM_fp);

        printf("r0=%lx r1=%lx\n", regs.ARM_r0, regs.ARM_r1);

        printf("r2=%lx r3=%lx\n", regs.ARM_r2, regs.ARM_r3);

    }

    // 在目标pid进程中，抬高栈顶分配内存空间用于存放需要加载的动态库文件

    libaddr = regs.ARM_sp - n*4 - sizeof(sc);

    // 保存加载到目标pid进程中的so库文件的路径字符串的指针

    sc[18] = libaddr;

    //sc[14] = libaddr;

    //printf("libaddr: %x\n", libaddr);

    if (stack_start == 0) {

        stack_start = (unsigned long int) strtol(argv[3], NULL, 16);

        stack_start = stack_start << 12;

        stack_end = stack_start + strtol(argv[4], NULL, 0);

    }

    if (debug)

        printf("stack: 0x%x-0x%x leng = %d\n", stack_start, stack_end, stack_end-stack_start);

    // 将需要加载的so库文件的路径字符串写入到目标pid进程的内存地址libaddr处

    if (0 > write_mem(pid, (unsigned long*)arg, n, libaddr)) {

        printf("cannot write library name (%s) to stack, error!\n", arg);

        exit(1);

    }

    // 在目标pid进程中，再次抬高栈顶用于存放执行的shellcode

    codeaddr = regs.ARM_sp - sizeof(sc);

    // 将shellcode代码src写入到目标pid进程的内存地址codeaddr处

    if (0 > write_mem(pid, (unsigned long*)&sc, sizeof(sc)/sizeof(long), codeaddr)) {

        printf("cannot write code, error!\n");

        exit(1);

    }

    if (debug)

        printf("executing injection code at 0x%lx\n", codeaddr);

    // 修改目标pid进程中的栈指针regs.ARM_sp的值

    regs.ARM_sp = regs.ARM_sp - n*4 - sizeof(sc);

（5）.在目标pid进程中实现远程调用dlopen函数加载so库文件的shellcode代码不是很好理解，博主Roland_Sun特此画了一张图，感觉还不错，拿来学习一下，顺便将作者的分析思路也一并摘过来，感谢作者Roland_Sun。

对于ARM处理器来说，pc寄存器的值指向的不是当前正在执行指令的地址，而是往下第二条指令的地址。

开始分析shellcode代码的含义，指令将从codeaddr指示的位置从低到高依次执行。

1. 第一条指令将pc寄存器的值加上64，读出那个地方的内容（4个字节），然后放到寄存器r0中。刚才说过了，pc寄存器值指向的是当前指令位置加8个字节，也就是说这条指令实际读出的是当前指令位置向下72个字节。由于sc数组是int型的，就是数组当前元素位置向下18个元素处。数一数，刚好是libaddr的位置。所以这条指令是为了让r0寄存器指向.so共享库路径名字符串。

2. 第二条指令很简单，是将0赋值给寄存器r1。

3. 第三条指令用来将pc寄存器值保存到lr寄存器中，这样做的目的是为了调用dlopen()函数返回后，跳转到指令“ldr sp, [pc, #44]”处执行。

4. 第四条指令是将pc加上56处的数值加载到pc中，pc+56处是哪？当前指令位置往下64字节，16个元素，刚好是dlopen()函数的调用地址。所以，这条指令其实就是调用dlopen()函数，传入的参数一个是r0寄存器指向的共享库路径名，另一个是r1寄存器中的0。

（6）.修改获取到目标pid进程的R0、R1、R2寄存器的值为mprotect函数被调用时的参数值，修改获取到目标pid进程的PC寄存器值为前面获取到的目标pid进程mprotect函数的远程调用地址mprotectaddr，修改获取到目标pid进程的LR寄存器值为shellcode的地址codeaddr。/font>

    // 设置mprotect函数的第1个参数为目标pid进程中栈的起始地址

    regs.ARM_r0 = stack_start; // want to make stack executable

    //printf("r0 %x\n", regs.ARM_r0);

    // 设置mprotect函数的第2个参数为目标pid进程中整个栈大小

    regs.ARM_r1 = stack_end - stack_start; // stack size

    //printf("mprotect(%x, %d, ALL)\n", regs.ARM_r0, regs.ARM_r1);

    // 设置mprotect函数的第3个参数为目标pid进程中栈被修改为可读可写可执行

    regs.ARM_r2 = PROT_READ|PROT_WRITE|PROT_EXEC; // protections

    // normal mode, first call mprotect

    if (nomprotect == 0) {

        if (debug)

            printf("calling mprotect\n");

        // 在目标pid进程中调用完mprotect函数之后，函数返回调用写入的关键shellcode代码

        regs.ARM_lr = codeaddr; // points to loading and fixing code

        // 在目标pid进程中调用mprotect函数将目标pid进程整个栈修改为可读可写可执行

        regs.ARM_pc = mprotectaddr; // execute mprotect()

    } else {

        // no need to execute mprotect on old Android versions

        // 直接调用shellcode，不需要修改内存属性（可以删除）

        regs.ARM_pc = codeaddr;

    }

（7）.重新设置目标pid进程的寄存器环境的状态值（已经被我们修改了），释放附加的目标pid进程让其继续执行；由于目标pid进程的寄存器状态被我们改变了，因此目标pid进程继续执行会先调用mprotect函数将shellcode所在栈内存的属性改为可读可写可执行，然后在mprotect函数返回时，在目标pid进程中调用dlopen函数加载so库文件的shellcode代码也得以执行，从而实现Android跨进程的so注入。

    // 设置目标pid进程的所有寄存器的值（即设置好目标pid进程中mprotect函数的参数及调用）

    ptrace(PTRACE_SETREGS, pid, 0, &regs);

    // 让目标pid进程继续执行即让目标pid进程先执行mprotect函数然后执行shellcode代码实现so动态库文件的注入

    ptrace(PTRACE_DETACH, pid, 0, (void *)SIGCONT);

    // 打印日志消息

    if (debug)

        printf("library injection completed!\n");

（8）.注入工具hijack实现思路的总结

作者在获取目标pid进程mprotect函数的远程调用地址的思路为我们跨进程获取远程目标pid进程的目标函数的调用地址提供了另外一种思路：先通过/proc/pid/maps获取到目标pid进程的目标函数所在的so库文件的基地址和so库文件的路径，然后解析目标pid进程的目标函数所在的so库文件，获取到该so库文件的系统静态符号表和系统动态符号表，查找到目标函数的调用地址的RVA，该目标函数的RVA和该so库文件的内存基地址相加即为目标pid进程的目标函数的远程调用地址VA。
ptrace附加目标pid进程，减小目标pid进程的SP寄存器的值，实现在目标pid进程中申请内存空间，写入shellcode的思路也不错。
修改ptrace附加目标pid进程时寄存器R0、R1、R2的状态值为mprotect函数的参数值，PC寄存器的值为mprotect函数的调用地址，然后mprotect函数返回调用shellcode实现dlopen加载so库文件的思路也是很赞的。
借助ptrace释放进程时目标pid进程寄存器环境的恢复来先执行mprotect函数修改栈内存为可读可写可执行，为shellcode的顺利执行做准备，然后函数mprotect返回执行shellcode代码，比较巧妙。唯独不足就是，目标pid进程被附加后寄存器环境的恢复稍有不足，不过应该影响不大。

四、注入工具hijack代码

源码文件 hijack.c

/*

 * hijack.c - force a process to load a library

 *

 *  ARM / Android version by:

 *  Collin Mulliner <collin[at]mulliner.org>

 *  http://www.mulliner.org/android/

 *  (c) 2012,2013

 *

 *

 *  original x86 version by:

 *  Copyright (C) 2002 Victor Zandy <zandy[at]cs.wisc.edu>

 *

 *  License: LGPL 2.1

 *

 */

#define _XOPEN_SOURCE 500  /* include pread,pwrite */

#define _GNU_SOURCE

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <ctype.h>

#include <fcntl.h>

#include <sys/ptrace.h>

#include <sys/types.h>

#include <sys/wait.h>

#include <sys/stat.h>

#include <dlfcn.h>

#include <elf.h>

#include <unistd.h>

#include <errno.h>

#include <sys/mman.h>

int debug = 0;

int zygote = 0;

int nomprotect = 0;

unsigned int stack_start;

unsigned int stack_end;

/* memory map for libraries */

#define MAX_NAME_LEN 256

// 默认的内存布局的模块的名称

#define MEMORY_ONLY  "[memory]"

struct mm {

    // so库文件的名称

    char name[MAX_NAME_LEN];

    // so库文件的起始内存地址和结束内存地址

    unsigned long start, end;

};

// elf文件的系统符号表数据结构

typedef struct symtab *symtab_t;

struct symlist {

    Elf32_Sym *sym;       /* symbols */

    char *str;            /* symbol strings */

    unsigned num;         /* number of symbols */

};

struct symtab {

    struct symlist *st;    /* "static" symbols */

    struct symlist *dyn;   /* dynamic symbols */

};

// 用于内存空间的申请

static void *

xmalloc(size_t size)

{

    void *p;

    p = malloc(size);

    if (!p) {

        printf("Out of memory\n");

        exit(1);

    }

    return p;

}

static struct symlist *

get_syms(int fd, Elf32_Shdr *symh, Elf32_Shdr *strh)

{

    struct symlist *sl, *ret;

    int rv;

    ret = NULL;

    sl = (struct symlist *) xmalloc(sizeof(struct symlist));

    sl->str = NULL;

    sl->sym = NULL;

    /* sanity */

    if (symh->sh_size % sizeof(Elf32_Sym)) {

        printf("elf_error\n");

        goto out;

    }

    /* symbol table */

    sl->num = symh->sh_size / sizeof(Elf32_Sym);

    sl->sym = (Elf32_Sym *) xmalloc(symh->sh_size);

    rv = pread(fd, sl->sym, symh->sh_size, symh->sh_offset);

    if (0 > rv) {

        //perror("read");

        goto out;

    }

    if (rv != symh->sh_size) {

        printf("elf error\n");

        goto out;

    }

    /* string table */

    sl->str = (char *) xmalloc(strh->sh_size);

    rv = pread(fd, sl->str, strh->sh_size, strh->sh_offset);

    if (0 > rv) {

        //perror("read");

        goto out;

    }

    if (rv != strh->sh_size) {

        printf("elf error");

        goto out;

    }

    ret = sl;

out:

    return ret;

}

// 解析打开的ELF文件

static int do_load(int fd, symtab_t symtab)

{

    int rv;

    size_t size;

    Elf32_Ehdr ehdr;

    Elf32_Shdr *shdr = NULL, *p;

    Elf32_Shdr *dynsymh, *dynstrh;

    Elf32_Shdr *symh, *strh;

    char *shstrtab = NULL;

    int i;

    int ret = -1;

    /* elf header */

    rv = read(fd, &ehdr, sizeof(ehdr));

    if (0 > rv) {

        //perror("read");

        goto out;

    }

    if (rv != sizeof(ehdr)) {

        printf("elf error\n");

        goto out;

    }

    if (strncmp(ELFMAG, ehdr.e_ident, SELFMAG)) { /* sanity */

        printf("not an elf\n");

        goto out;

    }

    if (sizeof(Elf32_Shdr) != ehdr.e_shentsize) { /* sanity */

        printf("elf error\n");

        goto out;

    }

    /* section header table */

    size = ehdr.e_shentsize * ehdr.e_shnum;

    shdr = (Elf32_Shdr *) xmalloc(size);

    rv = pread(fd, shdr, size, ehdr.e_shoff);

    if (0 > rv) {

        //perror("read");

        goto out;

    }

    if (rv != size) {

        printf("elf error");

        goto out;

    }

    /* section header string table */

    size = shdr[ehdr.e_shstrndx].sh_size;

    shstrtab = (char *) xmalloc(size);

    rv = pread(fd, shstrtab, size, shdr[ehdr.e_shstrndx].sh_offset);

    if (0 > rv) {

        //perror("read");

        goto out;

    }

    if (rv != size) {

        printf("elf error\n");

        goto out;

    }

    /* symbol table headers */

    symh = dynsymh = NULL;

    strh = dynstrh = NULL;

    for (i = 0, p = shdr; i < ehdr.e_shnum; i++, p++)

        if (SHT_SYMTAB == p->sh_type) {

            if (symh) {

                printf("too many symbol tables\n");

                goto out;

            }

            symh = p;

        } else if (SHT_DYNSYM == p->sh_type) {

            if (dynsymh) {

                printf("too many symbol tables\n");

                goto out;

            }

            dynsymh = p;

        } else if (SHT_STRTAB == p->sh_type

               && !strncmp(shstrtab+p->sh_name, ".strtab", 7)) {

            if (strh) {

                printf("too many string tables\n");

                goto out;

            }

            strh = p;

        } else if (SHT_STRTAB == p->sh_type

               && !strncmp(shstrtab+p->sh_name, ".dynstr", 7)) {

            if (dynstrh) {

                printf("too many string tables\n");

                goto out;

            }

            dynstrh = p;

        }

    /* sanity checks */

    if ((!dynsymh && dynstrh) || (dynsymh && !dynstrh)) {

        printf("bad dynamic symbol table");

        goto out;

    }

    if ((!symh && strh) || (symh && !strh)) {

        printf("bad symbol table");

        goto out;

    }

    if (!dynsymh && !symh) {

        printf("no symbol table");

        goto out;

    }

    /* symbol tables */

    if (dynsymh)

        symtab->dyn = get_syms(fd, dynsymh, dynstrh);

    if (symh)

        symtab->st = get_syms(fd, symh, strh);

    ret = 0;

out:

    free(shstrtab);

    free(shdr);

    return ret;

}

static symtab_t load_symtab(char *filename)

{

    int fd;

    symtab_t symtab;

    symtab = (symtab_t) xmalloc(sizeof(*symtab));

    memset(symtab, 0, sizeof(*symtab));

    // 打开elf文件

    fd = open(filename, O_RDONLY);

    if (0 > fd) {

        //perror("open");

        return NULL;

    }

    // 解析elf文件，获取elf的".symtab"和".dynsym"的信息结构体

    if (0 > do_load(fd, symtab)) {

        printf("Error ELF parsing %s\n", filename);

        free(symtab);

        symtab = NULL;

    }

    close(fd);

    return symtab;

}

// 获取指定进程内存布局的内存地址信息和名称，相同名称的内存布局空间进行合并

static int

load_memmap(pid_t pid, struct mm *mm, int *nmmp)

{

    char raw[80000]; // this depends on the number of libraries an executable uses

    char name[MAX_NAME_LEN];

    char *p;

    unsigned long start, end;

    struct mm *m;

    int nmm = 0;

    int fd, rv;

    int i;

    sprintf(raw, "/proc/%d/maps", pid);

    fd = open(raw, O_RDONLY);

    if (0 > fd) {

        printf("Can't open %s for reading\n", raw);

        return -1;

    }

    /* Zero to ensure data is null terminated */

    memset(raw, 0, sizeof(raw));

    p = raw;

    while (1) {

        rv = read(fd, p, sizeof(raw)-(p-raw));

        if (0 > rv) {

            //perror("read");

            return -1;

        }

        if (0 == rv)

            break;

        p += rv;

        if (p-raw >= sizeof(raw)) {

            printf("Too many memory mapping\n");

            return -1;

        }

    }

    close(fd);

    p = strtok(raw, "\n");

    m = mm;

    while (p) {

        /* parse current map line */

        rv = sscanf(p, "%08lx-%08lx %*s %*s %*s %*s %s\n",

                &start, &end, name);

        p = strtok(NULL, "\n");

        if (rv == 2) {

            m = &mm[nmm++];

            m->start = start;

            m->end = end;

            strcpy(m->name, MEMORY_ONLY);

            continue;

        }

        if (strstr(name, "stack") != 0) {

            stack_start = start;

            stack_end = end;

        }

        /* search backward for other mapping with same name */

        for (i = nmm-1; i >= 0; i--) {

            m = &mm[i];

            if (!strcmp(m->name, name))

                break;

        }

        if (i >= 0) {

            if (start < m->start)

                m->start = start;

            if (end > m->end)

                m->end = end;

        } else {

            /* new entry */

            m = &mm[nmm++];

            m->start = start;

            m->end = end;

            strcpy(m->name, name);

        }

    }

    *nmmp = nmm;

    return 0;

}

/* Find libc in MM, storing no more than LEN-1 chars of

   its name in NAME and set START to its starting

   address.  If libc cannot be found return -1 and

   leave NAME and START untouched.  Otherwise return 0

   and null-terminated NAME. */

static int

find_libc(char *name, int len, unsigned long *start,

      struct mm *mm, int nmm)

{

    int i;

    struct mm *m;

    char *p;

    for (i = 0, m = mm; i < nmm; i++, m++) {

        if (!strcmp(m->name, MEMORY_ONLY))

            continue;

        p = strrchr(m->name, '/');

        if (!p)

            continue;

        p++;

        if (strncmp("libc", p, 4))

            continue;

        p += 4;

        /* here comes our crude test -> 'libc.so' or 'libc-[0-9]' */

        if (!strncmp(".so", p, 3) || (p[0] == '-' && isdigit(p[1])))

            break;

    }

    if (i >= nmm)

        /* not found */

        return -1;

    *start = m->start;

    strncpy(name, m->name, len);

    if (strlen(m->name) >= len)

        name[len-1] = '\0';

    return 0;

}

static int

find_linker_mem(char *name, int len, unsigned long *start,

      struct mm *mm, int nmm)

{

    int i;

    struct mm *m;

    char *p;

    for (i = 0, m = mm; i < nmm; i++, m++) {

        //printf("name = %s\n", m->name);

        //printf("start = %x\n", m->start);

        if (!strcmp(m->name, MEMORY_ONLY))

            continue;

        p = strrchr(m->name, '/');

        if (!p)

            continue;

        p++;

        if (strncmp("linker", p, 6))

            continue;

        break; // <--- hack

        p += 4;

        /* here comes our crude test -> 'libc.so' or 'libc-[0-9]' */

        if (!strncmp(".so", p, 3) || (p[0] == '-' && isdigit(p[1])))

            break;

    }

    if (i >= nmm)

        /* not found */

        return -1;

    *start = m->start;

    strncpy(name, m->name, len);

    if (strlen(m->name) >= len)

        name[len-1] = '\0';

    return 0;

}

static int

lookup2(struct symlist *sl, unsigned char type,

    char *name, unsigned long *val)

{

    Elf32_Sym *p;

    int len;

    int i;

    len = strlen(name);

    for (i = 0, p = sl->sym; i < sl->num; i++, p++) {

        //printf("name: %s %x\n", sl->str+p->st_name, p->st_value);

        if (!strncmp(sl->str+p->st_name, name, len)

            && ELF32_ST_TYPE(p->st_info) == type) {

            //if (p->st_value != 0) {

            *val = p->st_value;

            return 0;

            //}

        }

    }

    return -1;

}

static int lookup_sym(symtab_t s, unsigned char type,

       char *name, unsigned long *val)

{

    if (s->dyn && !lookup2(s->dyn, type, name, val))

        return 0;

    if (s->st && !lookup2(s->st, type, name, val))

        return 0;

    return -1;

}

static int lookup_func_sym(symtab_t s, char *name, unsigned long *val)

{

    return lookup_sym(s, STT_FUNC, name, val);

}

static int find_name(pid_t pid, char *name, unsigned long *addr)

{

    struct mm mm[1000];

    unsigned long libcaddr;

    int nmm;

    char libc[256];

    symtab_t s;

    // 获取被注入pid进程的so库文件的名称和内存布局起始、结束地址的信息

    if (0 > load_memmap(pid, mm, &nmm)) {

        printf("cannot read memory map\n");

        return -1;

    }

    // 获取被注入pid进程的libc.so库文件的加载基地址以及libc库文件的路径

    if (0 > find_libc(libc, sizeof(libc), &libcaddr, mm, nmm)) {

        printf("cannot find libc\n");

        return -1;

    }

    // 打开查找到的lib目标库文件（路径字符串libc）解析该Elf文件

    // 获取该lib库文件的静态库和动态库的符号表信息".symtab"或者".dynsym".

    s = load_symtab(libc);

    if (!s) {

        printf("cannot read symbol table\n");

        return -1;

    }

    // 在当前进程加载的libc库文件中查找导出的 name名称函数的相对偏移RVA

    // 这个地方，有更好获取mprotect函数调用地址的方法，作者后面也用到了，不知道为什么要用这么复杂的方法?

    if (0 > lookup_func_sym(s, name, addr)) {

        printf("cannot find %s\n", name);

        return -1;

    }

    // 获取得到目标pid中 name 名称的指定函数的远程调用地址

    *addr += libcaddr;

    return 0;

}

// 获取指定进程中"/system/bin/linker"的基地址

static int find_linker(pid_t pid, unsigned long *addr)

{

    struct mm mm[1000];

    unsigned long libcaddr;

    int nmm;

    char libc[256];

    symtab_t s;

    // 获取指定进程的内存映射的信息

    if (0 > load_memmap(pid, mm, &nmm)) {

        printf("cannot read memory map\n");

        return -1;

    }

    // 获取指定进程中"/system/bin/linker"的基地址

    if (0 > find_linker_mem(libc, sizeof(libc), &libcaddr, mm, nmm)) {

        printf("cannot find libc\n");

        return -1;

    }

    *addr = libcaddr;

    return 1;

}

/* Write NLONG 4 byte words from BUF into PID starting

   at address POS.  Calling process must be attached to PID. */

static int

write_mem(pid_t pid, unsigned long *buf, int nlong, unsigned long pos)

{

    unsigned long *p;

    int i;

    for (p = buf, i = 0; i < nlong; p++, i++)

        if (0 > ptrace(PTRACE_POKETEXT, pid, (void *)(pos+(i*4)), (void *)*p))

            return -1;

    return 0;

}

static int

read_mem(pid_t pid, unsigned long *buf, int nlong, unsigned long pos)

{

    unsigned long *p;

    int i;

    for (p = buf, i = 0; i < nlong; p++, i++)

        if ((*p = ptrace(PTRACE_PEEKTEXT, pid, (void *)(pos+(i*4)), (void *)*p)) < 0)

            return -1;

    return 0;

}

// 没有用到

unsigned int sc_old[] = {

// libname

0xe59f0030, // ldr     r0, [pc, #48] | addr of "libname" in r0

0xe3a01000, // mov     r1, #0        | r1 = 0 (flags=0)

0xe1a0e00f, // mov     lr, pc        | populate lr

0xe59ff028, // ldr     pc, [pc, #40] | call dlopen()

0xe59fd01c, // ldr     sp, [pc, #28] | fix sp

0xe59f0008, // ldr     r0, [pc, #12] | fix r0

0xe59f1008, // ldr     r1, [pc, #12] | fix r1

0xe59fe008, // ldr     lr, [pc, #12] | fix lr

0xe59ff008, // ldr     pc, [pc, #12] | fix pc (continue process)

0xe1a00000, // nop (mov r0,r0)       | r0

0xe1a00000, // nop (mov r0,r0)       | r1

0xe1a00000, // nop (mov r0,r0)       | lr

0xe1a00000, // nop (mov r0,r0)       | pc

0xe1a00000, // nop (mov r0,r0)       | sp

0xe1a00000, // nop (mov r0,r0)       | addr of libname

0xe1a00000  // nop (mov r0,r0)       | dlopen address

};

// dlopen函数的调用方式：void * dlopen( const char * pathname, int mode);

// pc寄存器值指向的是当前指令位置加8个字节

// codeaddr即写入的shellcode执行代码开始的位置

// 作者在目标pid进程中写入shellcode进行执行的方法不错，比较巧妙

unsigned int sc[] = {

0xe59f0040, //        ldr     r0, [pc, #64]   ; 48 <.text+0x48>-->将需要加载的so库文件的文件路径字符串的地址给r0

0xe3a01000, //        mov     r1, #0  ; 0x0                    -->即dlopen函数的mode=0

0xe1a0e00f, //        mov     lr, pc                           -->设置dlopen函数的返回地址，用以后面恢复目标pid进程的寄存器sp、r0、r1、r2、r3、lr、pc的值。

0xe59ff038, //        ldr     pc, [pc, #56]   ; 4c <.text+0x4c>-->调用dlopen函数实现在目标pid进程中加载so库文件

0xe59fd02c, //        ldr     sp, [pc, #44]   ; 44 <.text+0x44>-->开始恢复目标pid进程被ptrace时的进程环境即恢复此时一些寄存器的值

0xe59f0010, //        ldr     r0, [pc, #16]   ; 30 <.text+0x30>-->感觉目标pid进程环境的恢复不是很完美~~~~

0xe59f1010, //        ldr     r1, [pc, #16]   ; 34 <.text+0x34>

0xe59f2010, //        ldr     r2, [pc, #16]   ; 38 <.text+0x38>

0xe59f3010, //        ldr     r3, [pc, #16]   ; 3c <.text+0x3c>

0xe59fe010, //        ldr     lr, [pc, #16]   ; 40 <.text+0x40>

0xe59ff010, //        ldr     pc, [pc, #16]   ; 44 <.text+0x44>

0xe1a00000, //        nop                     r0

0xe1a00000, //        nop                     r1

0xe1a00000, //        nop                     r2

0xe1a00000, //        nop                     r3

0xe1a00000, //        nop                     lr

0xe1a00000, //        nop                     pc

0xe1a00000, //        nop                     sp

0xe1a00000, //        nop                     addr of libname

0xe1a00000, //        nop                     dlopenaddr

};

struct pt_regs2 {

         long uregs[18];

};

#define ARM_cpsr        uregs[16]

#define ARM_pc          uregs[15]

#define ARM_lr          uregs[14]

#define ARM_sp          uregs[13]

#define ARM_ip          uregs[12]

#define ARM_fp          uregs[11]

#define ARM_r10         uregs[10]

#define ARM_r9          uregs[9]

#define ARM_r8          uregs[8]

#define ARM_r7          uregs[7]

#define ARM_r6          uregs[6]

#define ARM_r5          uregs[5]

#define ARM_r4          uregs[4]

#define ARM_r3          uregs[3]

#define ARM_r2          uregs[2]

#define ARM_r1          uregs[1]

#define ARM_r0          uregs[0]

#define ARM_ORIG_r0     uregs[17]

// Android系统进行so的注入需要root权限才能顺利进行

#define HELPSTR "error usage: %s -p PID -l LIBNAME [-d (debug on)] [-z (zygote)] [-m (no mprotect)] [-s (appname)] [-Z (trace count)] [-D (debug level)]\n"

// main函数的地方

int main(int argc, char *argv[])

{

    pid_t pid = 0;

    struct pt_regs2 regs;

    unsigned long dlopenaddr, mprotectaddr, codeaddr, libaddr;

    unsigned long *p;

    int fd = 0;

    int n = 0;

    char buf[32];

    char *arg;

    int opt;

    char *appname = 0;

    // 解析传入的参数

    while ((opt = getopt(argc, argv, "p:l:dzms:Z:D:")) != -1) {

        switch (opt) {

            case 'p':

                // 获取被注入的进程pid

                pid = strtol(optarg, NULL, 0);

                break;

            case 'Z':

                // trace count

                zygote = strtol(optarg, NULL, 0);

                break;

            case 'D':

                debug = strtol(optarg, NULL, 0);

                break;

            case 'l':

                n = strlen(optarg)+1;

                n = n/4 + (n%4 ? 1 : 0);

                arg = malloc(n*sizeof(unsigned long));

                // 获取将被注入的so库的文件路径

                memcpy(arg, optarg, n*4);

                break;

            case 'm':

                nomprotect = 1;

                break;

            case 'd':

                debug = 1;

                break;

            case 'z':

                zygote = 1;

                break;

            case 's':

                zygote = 1;

                // 获取目标pid的子进程的名称

                appname = strdup(optarg);

                break;

            default:

                fprintf(stderr, HELPSTR, argv[0]);

                exit(0);

                break;

        }

    }

    // 判断进程pid和将被注入的so库文件的路径是否符合要求

    if (pid == 0 || n == 0) {

        fprintf(stderr, HELPSTR, argv[0]);

        exit(0);

    }

    // 获取目标进程的libc库中 mprotect 函数远程调用地址

    if (!nomprotect) {

        if (0 > find_name(pid, "mprotect", &mprotectaddr)) {

            printf("can't find address of mprotect(), error!\n");

            exit(1);

        }

        if (debug)

            printf("mprotect: 0x%lx\n", mprotectaddr);

    }

    // 加载动态库文件"/system/lib/libdl.so"

    // 甚至获取当前进程中dlopen函数的调用地址，这一步不是必须这么去做的

    void *ldl = dlopen("libdl.so", RTLD_LAZY);

    if (ldl) {

        // 获取当前进程中，dlopen函数的调用地址

        dlopenaddr = (unsigned long)dlsym(ldl, "dlopen");

        dlclose(ldl);

    }

    unsigned long int lkaddr;

    unsigned long int lkaddr2;

    // 获取当前进程中的"/system/bin/linker"的基地址

    find_linker(getpid(), &lkaddr);

    //printf("own linker: 0x%x\n", lkaddr);

    //printf("offset %x\n", dlopenaddr - lkaddr);

    // 获取被注入的pid进程中"/system/bin/linker"的基地址

    find_linker(pid, &lkaddr2);

    //printf("tgt linker: %x\n", lkaddr2);

    // 获取被注入的目标pid进程中函数dlopen的远程调用地址

    dlopenaddr = lkaddr2 + (dlopenaddr - lkaddr);

    //printf("tgt dlopen : %x\n", lkaddr2 + (dlopenaddr - lkaddr));

    if (debug)

        printf("dlopen: 0x%lx\n", dlopenaddr);

    // ptrace附加到被注入的目标pid进程中

    if (0 > ptrace(PTRACE_ATTACH, pid, 0, 0)) {

        printf("cannot attach to %d, error!\n", pid);

        exit(1);

    }

    // 等待附加到目标pid进程完成

    waitpid(pid, NULL, 0);

    // 被注入的目标pid进程有子进程的情况处理（不是很理解，有待学习）

    if (appname) {  

        // 对被附加的目标pid进程设置被跟踪的调试选项PTRACE_O_TRACEFORK

        // PTRACE_O_TRACEFORK:被跟踪进程在下次调用fork()时停止执行，并自动跟踪新产生的进程，新产生的进程刚开始收到SIGSTOP信号。

        // 其新产生的进程的pid可以 通过PTRACE_GETEVENTMSG得到。

        if (ptrace(PTRACE_SETOPTIONS, pid, (void*)1, (void*)(PTRACE_O_TRACEFORK))) {

            printf("FATAL ERROR: ptrace(PTRACE_SETOPTIONS, ...)");

            return -1;

        }

        // 让目标pid进程继续执行并处理信号signal

        ptrace(PTRACE_CONT, pid, (void*)1, 0);

        int t;

        int stat;

        int child_pid = 0;

        for (;;) {

            // pid=-1 等待任何子进程,相当于 wait()。

            // __WALL等待所有的子进程

            // WUNTRACED 若子进程进入暂停状态，则马上返回，但子进程的结束状态不予以理会

            t = waitpid(-1, &stat, __WALL|WUNTRACED);

            //

            if (t != 0 && t == child_pid) {

                if (debug > 1)

                    printf(".");

                char fname[256];

                sprintf(fname, "/proc/%d/cmdline", child_pid);

                int fp = open(fname, O_RDONLY);

                if (fp < 0) {

                    // 附加跟踪系统调用

                    ptrace(PTRACE_SYSCALL, child_pid, 0, 0);

                    continue;

                }

                read(fp, fname, sizeof(fname));

                close(fp);

                if (strcmp(fname, appname) == 0) {

                    if (debug)

                        printf("zygote -> %s\n", fname);

                    // detach from zygote

                    ptrace(PTRACE_DETACH, pid, 0, (void *)SIGCONT);

                    // now perform on new process

                    pid = child_pid;

                    break;

                }

                else {

                    ptrace(PTRACE_SYSCALL, child_pid, 0, 0);

                    continue;

                }

            }

            if (WIFSTOPPED(stat) && (WSTOPSIG(stat) == SIGTRAP)) {

                if ((stat >> 16) & PTRACE_EVENT_FORK) {

                    if (debug > 1)

                        printf("fork\n");

                    int b = t; // save parent pid

                    ptrace(PTRACE_GETEVENTMSG, t, 0, &child_pid);

                    if (debug)

                        printf("PID=%d  child=%d\n", t, child_pid);

                    t = child_pid;

                    if (debug > 1)

                        printf("continue parent (zygote) PID=%d\n", b);

                    ptrace(PTRACE_CONT, b, (void*)1, 0);

                    ptrace(PTRACE_SYSCALL, child_pid, 0, 0);

                }

            }

        }

    }

    // 当被注入的目标pid进程为zygote进程情况的处理（不是很理解，有待学习）

    if (zygote) {

        int i = 0;

        for (i = 0; i < zygote; i++) {

            // -- zygote fix ---

            // we have to wait until the syscall is completed, IMPORTANT!

            // PTRACE_SYSCALL与PTRACE_CONT不同的是进行系统调用跟踪。在被跟踪进程继续运行直到调用系统调用开始或结束时，被跟踪进程被中止，并通知父进程。

            ptrace(PTRACE_SYSCALL, pid, 0, 0);

            if (debug > 1)

                printf("/");

            // 等待进程操作步骤完成

            waitpid(pid, NULL, 0);

            // 获取被注入的目标pid进程的所有寄存器的值

            ptrace(PTRACE_GETREGS, pid, 0, &regs);

            // 判断目标pid进程的指令指针寄存器regs.ARM_ip是否为0

            // 进而判断目标pid进程是否在系统调用跟踪的entry点处

            if (regs.ARM_ip != 0) {

                if (debug > 1)

                    printf("not a syscall entry, wait for entry\n");

                // 进行系统调用的跟踪

                ptrace(PTRACE_SYSCALL, pid, 0, 0);

                // 等待进程前面的操作步骤完成

                waitpid(pid, NULL, 0);

            }

            //if (debug)

            //  printf("process mode: currently waiting in SYSCALL\n");

            ptrace(PTRACE_SYSCALL, pid, 0, 0);

            if (debug > 1)

                printf("\\");

            // 等待进程操作步骤完成

            waitpid(pid, NULL, 0);

            //if (debug)

            //  printf("process mode: SYSCALL completed now inject\n");

            // ---- need to work with zygote --- end ---

        }

    }

    if (debug > 1)

        printf("\n");

    // 格式化得到字符串"/proc/pid/mem"

    sprintf(buf, "/proc/%d/mem", pid);

    // 获取被注入的目标pid进程内存中的内容

    fd = open(buf, O_WRONLY);

    if (0 > fd) {

        printf("cannot open %s, error!\n", buf);

        exit(1);

    }

    // 获取目标pid进程中此时所有寄存器的状态值

    ptrace(PTRACE_GETREGS, pid, 0, &regs);

    // setup variables of the loading and fixup code

    /*

    sc[9] = regs.ARM_r0;

    sc[10] = regs.ARM_r1;

    sc[11] = regs.ARM_lr;

    sc[12] = regs.ARM_pc;

    sc[13] = regs.ARM_sp;

    sc[15] = dlopenaddr;

    */

    // 保存目标pid进程此时所有寄存器的值（保存当前执行环境，用于还原）

    sc[11] = regs.ARM_r0;

    sc[12] = regs.ARM_r1;

    sc[13] = regs.ARM_r2;

    sc[14] = regs.ARM_r3;

    sc[15] = regs.ARM_lr;

    sc[16] = regs.ARM_pc;

    sc[17] = regs.ARM_sp;

    sc[19] = dlopenaddr;

    // 打印日志消息

    if (debug) {

        printf("pc=%lx lr=%lx sp=%lx fp=%lx\n", regs.ARM_pc, regs.ARM_lr, regs.ARM_sp, regs.ARM_fp);

        printf("r0=%lx r1=%lx\n", regs.ARM_r0, regs.ARM_r1);

        printf("r2=%lx r3=%lx\n", regs.ARM_r2, regs.ARM_r3);

    }

    // 在目标pid进程中，抬高栈顶分配内存空间用于存放需要加载的动态库文件

    libaddr = regs.ARM_sp - n*4 - sizeof(sc);

    // 保存加载到目标pid进程中的so库文件的路径字符串的指针

    sc[18] = libaddr;

    //sc[14] = libaddr;

    //printf("libaddr: %x\n", libaddr);

    if (stack_start == 0) {

        stack_start = (unsigned long int) strtol(argv[3], NULL, 16);

        stack_start = stack_start << 12;

        stack_end = stack_start + strtol(argv[4], NULL, 0);

    }

    if (debug)

        printf("stack: 0x%x-0x%x leng = %d\n", stack_start, stack_end, stack_end-stack_start);

    // 将需要加载的so库文件的路径字符串写入到目标pid进程的内存地址libaddr处

    if (0 > write_mem(pid, (unsigned long*)arg, n, libaddr)) {

        printf("cannot write library name (%s) to stack, error!\n", arg);

        exit(1);

    }

    // 在目标pid进程中，再次抬高栈顶用于存放执行的shellcode

    codeaddr = regs.ARM_sp - sizeof(sc);

    // 将shellcode代码src写入到目标pid进程的内存地址codeaddr处

    if (0 > write_mem(pid, (unsigned long*)&sc, sizeof(sc)/sizeof(long), codeaddr)) {

        printf("cannot write code, error!\n");

        exit(1);

    }

    if (debug)

        printf("executing injection code at 0x%lx\n", codeaddr);

    // 修改目标pid进程中的栈指针regs.ARM_sp的值

    regs.ARM_sp = regs.ARM_sp - n*4 - sizeof(sc);

    // 设置mprotect函数的第1个参数为目标pid进程中栈的起始地址

    regs.ARM_r0 = stack_start; // want to make stack executable

    //printf("r0 %x\n", regs.ARM_r0);

    // 设置mprotect函数的第2个参数为目标pid进程中整个栈大小

    regs.ARM_r1 = stack_end - stack_start; // stack size

    //printf("mprotect(%x, %d, ALL)\n", regs.ARM_r0, regs.ARM_r1);

    // 设置mprotect函数的第3个参数为目标pid进程中栈被修改为可读可写可执行

    regs.ARM_r2 = PROT_READ|PROT_WRITE|PROT_EXEC; // protections

    // normal mode, first call mprotect

    if (nomprotect == 0) {

        if (debug)

            printf("calling mprotect\n");

        // 在目标pid进程中调用完mprotect函数之后，函数返回调用写入的关键shellcode代码

        regs.ARM_lr = codeaddr; // points to loading and fixing code

        // 在目标pid进程中调用mprotect函数将目标pid进程整个栈修改为可读可写可执行

        regs.ARM_pc = mprotectaddr; // execute mprotect()

    } else {

        // no need to execute mprotect on old Android versions

        // 直接调用shellcode，不需要修改内存属性（可以删除）

        regs.ARM_pc = codeaddr;

    }

    // 设置目标pid进程的所有寄存器的值（即设置好目标pid进程中mprotect函数的参数及调用）

    ptrace(PTRACE_SETREGS, pid, 0, &regs);

    // 让目标pid进程继续执行即让目标pid进程先执行mprotect函数然后执行shellcode代码实现so动态库文件的注入

    ptrace(PTRACE_DETACH, pid, 0, (void *)SIGCONT);

    // 打印日志消息

    if (debug)

        printf("library injection completed!\n");

    return 0;

}

编译需要的配置文件 Android.mk

# Copyright (C) 2009 The Android Open Source Project

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

#      http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

#

LOCAL_PATH := $(call my-dir)

include $(CLEAR_VARS)

LOCAL_MODULE    := hijack

LOCAL_SRC_FILES := ../hijack.c

# 编译成arm指令模式

LOCAL_ARM_MODE := arm

# 设置编译选项 -g

LOCAL_CFLAGS := -g

# 编译生成可执行文件

include $(BUILD_EXECUTABLE)

hijack注入工具源码的编译和使用方法

cd hijack

cd jni

ndk-build

cd ..

adb push libs/armeabi/hijack /data/local/tmp/

adb shell chmod 0777 /data/local/tmp/hijack

adb shell

su

cd /data/local/tmp

>/data/local/tmp/adbi_example.log

# GET PID from com.android.phone

./hijack -d -p PID -l /data/local/tmp/libexample.so

cat adbi_example.log

hijack注入工具使用帮助的简要说明

// Android系统进行so的注入需要root权限才能顺利进行

#define HELPSTR "error usage: %s -p PID -l LIBNAME [-d (debug on)] [-z (zygote)] [-m (no mprotect)] [-s (appname)] [-Z (trace count)] [-D (debug level)]\n"

参考连接：

Android平台下hook框架adbi的研究（上）

android hook 框架 libinject2 简介、编译、运行

 Android利用ptrace实现Hook API