Linux内核机制—smp_hotplug

基于Linux-5.10

一、简介

1. 只是一个创建per-cpu线程执行用户提供的回调的机制。

2. 内核中已存在的注册

static struct smp_hotplug_thread idle_inject_threads = { //drivers/powercap/idle_inject.c

    .store = &idle_inject_thread.tsk,

    .setup = idle_inject_setup,

    .thread_fn = idle_inject_fn,

    .thread_comm = "idle_inject/%u",

    .thread_should_run = idle_inject_should_run,

};

early_initcall

    smpboot_register_percpu_thread(&idle_inject_threads);

static struct smp_hotplug_thread cpu_stop_threads = { //kernel/stop_machine.c

    .store            = &cpu_stopper.thread,

    .thread_should_run    = cpu_stop_should_run,

    .thread_fn        = cpu_stopper_thread,

    .thread_comm        = "migration/%u",

    .create            = cpu_stop_create,

    .park            = cpu_stop_park,

    .selfparking        = true,

};

early_initcall

    smpboot_register_percpu_thread(&cpu_stop_threads)

static struct smp_hotplug_thread rcu_cpu_thread_spec = { //kernel/rcu/tree.c

    .store            = &rcu_data.rcu_cpu_kthread_task,

    .thread_should_run    = rcu_cpu_kthread_should_run,

    .thread_fn        = rcu_cpu_kthread,

    .thread_comm        = "rcuc/%u", //per-cpu的

    .setup            = rcu_cpu_kthread_setup,

    .park            = rcu_cpu_kthread_park,

};

early_initcall

    smpboot_register_percpu_thread(&rcu_cpu_thread_spec)

static struct smp_hotplug_thread softirq_threads = { //kernel/softirq.c

    .store            = &ksoftirqd,

    .thread_should_run    = ksoftirqd_should_run,

    .thread_fn        = run_ksoftirqd,

    .thread_comm        = "ksoftirqd/%u",

};

early_initcall

    smpboot_register_percpu_thread(&softirq_threads)

static struct smp_hotplug_thread cpuhp_threads = { //kernel/cpu.c

    .store            = &cpuhp_state.thread,

    .create            = &cpuhp_create,

    .thread_should_run    = cpuhp_should_run,

    .thread_fn        = cpuhp_thread_fun,

    .thread_comm        = "cpuhp/%u",

    .selfparking        = true,

};

kernel_init_freeable //在 do_basic_setup() 时调用，比 early_initcall 调用的还早

    smp_init

        smpboot_register_percpu_thread(&cpuhp_threads)

都是通过 smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) 函数在内核启动早期调用的。注册线程的函数体都是smpboot_thread_fn()。

二、相关数据结构

1. struct smp_hotplug_thread

struct smp_hotplug_thread { //include/linux/smpboot.hs

    struct task_struct    * __percpu *store;

    struct list_head    list;

    int                    (*thread_should_run)(unsigned int cpu);

    void                (*thread_fn)(unsigned int cpu);

    void                (*create)(unsigned int cpu);

    void                (*setup)(unsigned int cpu);

    void                (*cleanup)(unsigned int cpu, bool online);

    void                (*park)(unsigned int cpu);

    void                (*unpark)(unsigned int cpu);

    bool                selfparking;

    const char            *thread_comm;

};

CPU hotplug 相关的描述符。

store: per-cpu变量，指向每个 cpu 上的 task_struct 结构。smp hotplug thread 在注册时会为每个CPU注册一个内核线程。
list: 在初始化时通过它挂在全局 hotplug_threads 链表上，方便 core 进行管理。
thread_should_run: 检查线程是否应该运行的回调函数，在禁用抢占的情况下调用。
thread_fn: 关联的功能函数，这个是主要的回调，是开着抢占调用的。
create: 可选的设置回调函数，在创建线程时调用（不是从线程上下文中调用，TODO: 是在内核启动时调用？）
setup: 可选的设置回调函数，当线程第一次运行时调用，可用于设置线程属性。
cleanup: 可选的清理回调函数，当线程应该停止时调用（模块退出）
park: 可选的 park 回调函数，当线程被 park 时调用（cpu offline）
unpark: 可选的 unpark 回调函数，当线程被 unpark 时调用（cpu online）
selfparking: 若初始化为true，则创建完线程后线程状态是unpark的，为false则是parked的。
thread_comm: 创建的per-cpu线程的名称中基础的部分。

2. struct smpboot_thread_data

struct smpboot_thread_data {

    unsigned int            cpu;

    unsigned int            status;

    struct smp_hotplug_thread    *ht;

};

是一个辅助结构。

cpu: 判断是哪个CPU的，也就是在哪个CPU上执行。
status: per-cpu的hotplug线程的状态。
ht: 指向用户注册的hotplug结构。

三、注册流程

一般内核模块会先初始化一个 smp_hotplug_thread 结构，然后通常在 early_initcall() 或内核启动更早期调用 smpboot_register_percpu_thread() 进行注册。下面使用 stop_machine.c 中的注册进行举例：

static int __init cpu_stop_init(void)

{

    smpboot_register_percpu_thread(&cpu_stop_threads);

}

early_initcall(cpu_stop_init);

1. 注册函数执行流程：

int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) //smpboot.c

{

    ...

    for_each_online_cpu(cpu) {

        __smpboot_create_thread(plug_thread, cpu);

        smpboot_unpark_thread(plug_thread, cpu);

    }

    list_add(&plug_thread->list, &hotplug_threads);

}

1.1. __smpboot_create_thread 函数：

static int __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) //smpboot.c

{

    struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);

    struct smpboot_thread_data *td;

    td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); //arg2=0

    td->cpu = cpu;

    td->ht = ht;

    /* 创建的是这个内核线程，执行的函数体是 smpboot_thread_fn() 参数传的是td，td->ht 指向用户注册的结构 */

    tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, ht->thread_comm);

    /* 在 kthread->flags |= KTHREAD_IS_PER_CPU 标志 */

    kthread_set_per_cpu(tsk, cpu);

    /*

     * 设置tsk的 kthread->flags |= KTHREAD_SHOULD_PARK, 然后tsk会进入到TASK_PARKED状态，

     * 若tsk!=current则先唤醒它然后让其进入到TASK_PARKED状态。

     */

    kthread_park(tsk);

    /* 每个CPU上创建的任务由per-cpu的 store 指向 */

    *per_cpu_ptr(ht->store, cpu) = tsk;

    /* 若提供了 create 回调则调用，此时内核启动阶段，非进程上下文 */

    if (ht->create) {

        wait_task_inactive(tsk, TASK_PARKED);

        ht->create(cpu);

    }

    return 0;

}

struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),

                      void *data, unsigned int cpu, const char *namefmt)

{

    /* 在指定的cpu上注册一个CFS 120优先级的内核线程，线程函数体为 smpboot_thread_fn() */

    struct task_struct p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt, cpu);

    /*

     * 将创建的线程绑定到这个cpu上，这里会同时设置 p->flags |= PF_NO_SETAFFINITY

     * 标志位，不允许用户空间设置亲和性。

     */

    kthread_bind(p, cpu);

    /* 翻译：CPU 热插拔需要在 unparking 线程时再次绑定 */

    to_kthread(p)->cpu = cpu;

    return p;

}

1.2 smpboot_unpark_thread 函数：

static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cpu)

{

    struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);

    /* 若是使用者没有设置 selfparking= true 则会调用 */

    if (!ht->selfparking)

        kthread_unpark(tsk);

}

void kthread_unpark(struct task_struct *k)

{

    struct kthread *kthread = to_kthread(k);

    /* 翻译：新创建的 kthread 在 CPU 离线时被停放。绑定丢失了，需要重新设置。*/

    if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))

        __kthread_bind(k, kthread->cpu, TASK_PARKED);

    clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);

    /* 唤醒 parked 状态的任务 */

    wake_up_state(k, TASK_PARKED);

}

2. 总结

可以看到，所有注册 smp_hotplug_thread 结构的模块，响应函数都是 smpboot_thread_fn()，默认是CFS 120优先级。

若 smp_hotplug_thread::selfparking = true，则创建完线程后会自动对线程进行unpark操作，创建出来的线程是unparked状态。
为flase则创建出来的线程是parked的状态，使用者还需要自己进行unpark。

若提供了 smp_hotplug_thread::create 回调，则在创建过程中就会调用，此时还是内核启动的 early_init() 或更早的阶段。

线程创建时已经和单个CPU绑定了，且设置了 p->flags |= PF_NO_SETAFFINITY，不允许用户空间设置亲和性了。

四、实现逻辑

1. smpboot_thread_fn() 实现

既然创建的per-cpu的内核线程执行的是 smpboot_thread_fn()，这个函数是per-cpu的hotplug线程的死循环函数，在它里面会
检查线程是否需要stop、park、unpark、setup、cleanup 并调用用户注册的对应的回到函数。其目前只能返回0。下面看其实现。

static int smpboot_thread_fn(void *data) //smpboot.c

{

    struct smpboot_thread_data *td = data;

    struct smp_hotplug_thread *ht = td->ht;

    while (1) {

        set_current_state(TASK_INTERRUPTIBLE);

        preempt_disable();

        /*

         * 判断 kthread->flag & KTHREAD_SHOULD_STOP, 判断此 kthread 现

         * 在是否应该返回。

          * 当有人对此kthread调用了 kthread_stop() 时，它会被唤醒并返回

          * true。然后这里应该返回，返回值将被传递给 kthread_stop()。

          */

        if (kthread_should_stop()) {

            __set_current_state(TASK_RUNNING);

            preempt_enable();

            /* cleanup must mirror setup */

            if (ht->cleanup && td->status != HP_THREAD_NONE)

                ht->cleanup(td->cpu, cpu_online(td->cpu));

            kfree(td);

            return 0;

        }

        /* 判断 to_kthread->flags & KTHREAD_SHOULD_PARK, 判断此 kthread

         * 现在是否应该被park。

         * 也是先唤醒，然后执行park()回调。

         */

        if (kthread_should_park()) {

            __set_current_state(TASK_RUNNING);

            preempt_enable();

            if (ht->park && td->status == HP_THREAD_ACTIVE) {

                BUG_ON(td->cpu != smp_processor_id());

                ht->park(td->cpu);

                td->status = HP_THREAD_PARKED;

            }

            /*

             * 设置 current->state=TASK_PARKED，complete(&self->parked)

             * 然后将自己切走。

             */

            kthread_parkme();

            /* We might have been woken for stop */

            continue;

        }

        /* ---- 下面就是不需要stop和不需要park的情况了 ---- */

        BUG_ON(td->cpu != smp_processor_id());

        /* Check for state change setup */

        switch (td->status) {

        case HP_THREAD_NONE:

            __set_current_state(TASK_RUNNING);

            preempt_enable();

            if (ht->setup)

                ht->setup(td->cpu);

            td->status = HP_THREAD_ACTIVE;

            continue;

        case HP_THREAD_PARKED:

            __set_current_state(TASK_RUNNING);

            preempt_enable();

            if (ht->unpark)

                ht->unpark(td->cpu);

            td->status = HP_THREAD_ACTIVE;

            continue;

        }

        /*

         * 判断注册的回调是否需要运行，为假表示不需要运行，切走。

         * 若需要运行，则调用 ht->thread_fn() 回调。

         */

        if (!ht->thread_should_run(td->cpu)) {

            preempt_enable_no_resched();

            schedule();

        } else {

            __set_current_state(TASK_RUNNING);

            preempt_enable();

            ht->thread_fn(td->cpu); //例如：cpuhp_thread_fun

        }

    }

}

这个函数是个单纯的死循环执行逻辑，没有持任何锁，只是部分函数回调时是关着抢占的。

2. 其调用路径

上面注册per-cpu的内核线程是作为线程执行实体是其唯一调用路径，没有其它调用路径。

五、使用方法

既然内核线程函数体没有其它任何，那么只能靠通过 smp_hotplug_thread::store 保存的task_struct结构进行唤醒了，然后再在 smp_hotplug_thread 结构的回调函数中做文章。这也冲服体现了Linux-内核只提供机制，不提供策略的思想了！

六、总结

注册 smp_hotplug_thread 结构，内核只是提供了为每个CPU都创建一个线程执行其回调的机制，线程函数体是 smpboot_thread_fn()，此函数没有任何其它调用路径，因此使用者只能通过唤醒+实现回调来实现自己的功能，执行完回调后进程自动休眠。