11142d810STejun Heo /* 21142d810STejun Heo * kernel/stop_machine.c 31142d810STejun Heo * 41142d810STejun Heo * Copyright (C) 2008, 2005 IBM Corporation. 51142d810STejun Heo * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au 61142d810STejun Heo * Copyright (C) 2010 SUSE Linux Products GmbH 71142d810STejun Heo * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 81142d810STejun Heo * 91142d810STejun Heo * This file is released under the GPLv2 and any later version. 10e5582ca2SRusty Russell */ 111142d810STejun Heo #include <linux/completion.h> 121da177e4SLinus Torvalds #include <linux/cpu.h> 131142d810STejun Heo #include <linux/init.h> 14ee527cd3SPrarit Bhargava #include <linux/kthread.h> 159984de1aSPaul Gortmaker #include <linux/export.h> 161142d810STejun Heo #include <linux/percpu.h> 17ee527cd3SPrarit Bhargava #include <linux/sched.h> 18ee527cd3SPrarit Bhargava #include <linux/stop_machine.h> 19a12bb444SBenjamin Herrenschmidt #include <linux/interrupt.h> 201142d810STejun Heo #include <linux/kallsyms.h> 21a12bb444SBenjamin Herrenschmidt 2260063497SArun Sharma #include <linux/atomic.h> 231142d810STejun Heo 241142d810STejun Heo /* 251142d810STejun Heo * Structure to determine completion condition and record errors. May 261142d810STejun Heo * be shared by works on different cpus. 271142d810STejun Heo */ 281142d810STejun Heo struct cpu_stop_done { 291142d810STejun Heo atomic_t nr_todo; /* nr left to execute */ 301142d810STejun Heo bool executed; /* actually executed? */ 311142d810STejun Heo int ret; /* collected return value */ 321142d810STejun Heo struct completion completion; /* fired if nr_todo reaches 0 */ 331142d810STejun Heo }; 341142d810STejun Heo 351142d810STejun Heo /* the actual stopper, one per every possible cpu, enabled on online cpus */ 361142d810STejun Heo struct cpu_stopper { 371142d810STejun Heo spinlock_t lock; 38878ae127SRichard Kennedy bool enabled; /* is this stopper enabled? */ 391142d810STejun Heo struct list_head works; /* list of pending works */ 401142d810STejun Heo }; 411142d810STejun Heo 421142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); 43*860a0ffaSThomas Gleixner static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task); 44f445027eSJeremy Fitzhardinge static bool stop_machine_initialized = false; 451142d810STejun Heo 461142d810STejun Heo static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) 471142d810STejun Heo { 481142d810STejun Heo memset(done, 0, sizeof(*done)); 491142d810STejun Heo atomic_set(&done->nr_todo, nr_todo); 501142d810STejun Heo init_completion(&done->completion); 511142d810STejun Heo } 521142d810STejun Heo 531142d810STejun Heo /* signal completion unless @done is NULL */ 541142d810STejun Heo static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) 551142d810STejun Heo { 561142d810STejun Heo if (done) { 571142d810STejun Heo if (executed) 581142d810STejun Heo done->executed = true; 591142d810STejun Heo if (atomic_dec_and_test(&done->nr_todo)) 601142d810STejun Heo complete(&done->completion); 611142d810STejun Heo } 621142d810STejun Heo } 631142d810STejun Heo 641142d810STejun Heo /* queue @work to @stopper. if offline, @work is completed immediately */ 65*860a0ffaSThomas Gleixner static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) 661142d810STejun Heo { 67*860a0ffaSThomas Gleixner struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 68*860a0ffaSThomas Gleixner struct task_struct *p = per_cpu(cpu_stopper_task, cpu); 69*860a0ffaSThomas Gleixner 701142d810STejun Heo unsigned long flags; 711142d810STejun Heo 721142d810STejun Heo spin_lock_irqsave(&stopper->lock, flags); 731142d810STejun Heo 741142d810STejun Heo if (stopper->enabled) { 751142d810STejun Heo list_add_tail(&work->list, &stopper->works); 76*860a0ffaSThomas Gleixner wake_up_process(p); 771142d810STejun Heo } else 781142d810STejun Heo cpu_stop_signal_done(work->done, false); 791142d810STejun Heo 801142d810STejun Heo spin_unlock_irqrestore(&stopper->lock, flags); 811142d810STejun Heo } 821142d810STejun Heo 831142d810STejun Heo /** 841142d810STejun Heo * stop_one_cpu - stop a cpu 851142d810STejun Heo * @cpu: cpu to stop 861142d810STejun Heo * @fn: function to execute 871142d810STejun Heo * @arg: argument to @fn 881142d810STejun Heo * 891142d810STejun Heo * Execute @fn(@arg) on @cpu. @fn is run in a process context with 901142d810STejun Heo * the highest priority preempting any task on the cpu and 911142d810STejun Heo * monopolizing it. This function returns after the execution is 921142d810STejun Heo * complete. 931142d810STejun Heo * 941142d810STejun Heo * This function doesn't guarantee @cpu stays online till @fn 951142d810STejun Heo * completes. If @cpu goes down in the middle, execution may happen 961142d810STejun Heo * partially or fully on different cpus. @fn should either be ready 971142d810STejun Heo * for that or the caller should ensure that @cpu stays online until 981142d810STejun Heo * this function completes. 991142d810STejun Heo * 1001142d810STejun Heo * CONTEXT: 1011142d810STejun Heo * Might sleep. 1021142d810STejun Heo * 1031142d810STejun Heo * RETURNS: 1041142d810STejun Heo * -ENOENT if @fn(@arg) was not executed because @cpu was offline; 1051142d810STejun Heo * otherwise, the return value of @fn. 1061142d810STejun Heo */ 1071142d810STejun Heo int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) 1081142d810STejun Heo { 1091142d810STejun Heo struct cpu_stop_done done; 1101142d810STejun Heo struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; 1111142d810STejun Heo 1121142d810STejun Heo cpu_stop_init_done(&done, 1); 113*860a0ffaSThomas Gleixner cpu_stop_queue_work(cpu, &work); 1141142d810STejun Heo wait_for_completion(&done.completion); 1151142d810STejun Heo return done.executed ? done.ret : -ENOENT; 1161142d810STejun Heo } 1171142d810STejun Heo 1181142d810STejun Heo /** 1191142d810STejun Heo * stop_one_cpu_nowait - stop a cpu but don't wait for completion 1201142d810STejun Heo * @cpu: cpu to stop 1211142d810STejun Heo * @fn: function to execute 1221142d810STejun Heo * @arg: argument to @fn 1231142d810STejun Heo * 1241142d810STejun Heo * Similar to stop_one_cpu() but doesn't wait for completion. The 1251142d810STejun Heo * caller is responsible for ensuring @work_buf is currently unused 1261142d810STejun Heo * and will remain untouched until stopper starts executing @fn. 1271142d810STejun Heo * 1281142d810STejun Heo * CONTEXT: 1291142d810STejun Heo * Don't care. 1301142d810STejun Heo */ 1311142d810STejun Heo void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, 1321142d810STejun Heo struct cpu_stop_work *work_buf) 1331142d810STejun Heo { 1341142d810STejun Heo *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; 135*860a0ffaSThomas Gleixner cpu_stop_queue_work(cpu, work_buf); 1361142d810STejun Heo } 1371142d810STejun Heo 1381142d810STejun Heo /* static data for stop_cpus */ 139192d8857SSuresh Siddha static DEFINE_MUTEX(stop_cpus_mutex); 1401142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); 1411142d810STejun Heo 142fd7355baSTejun Heo static void queue_stop_cpus_work(const struct cpumask *cpumask, 143fd7355baSTejun Heo cpu_stop_fn_t fn, void *arg, 144fd7355baSTejun Heo struct cpu_stop_done *done) 1451142d810STejun Heo { 1461142d810STejun Heo struct cpu_stop_work *work; 1471142d810STejun Heo unsigned int cpu; 1481142d810STejun Heo 1491142d810STejun Heo /* initialize works and done */ 1501142d810STejun Heo for_each_cpu(cpu, cpumask) { 1511142d810STejun Heo work = &per_cpu(stop_cpus_work, cpu); 1521142d810STejun Heo work->fn = fn; 1531142d810STejun Heo work->arg = arg; 154fd7355baSTejun Heo work->done = done; 1551142d810STejun Heo } 1561142d810STejun Heo 1571142d810STejun Heo /* 1581142d810STejun Heo * Disable preemption while queueing to avoid getting 1591142d810STejun Heo * preempted by a stopper which might wait for other stoppers 1601142d810STejun Heo * to enter @fn which can lead to deadlock. 1611142d810STejun Heo */ 1621142d810STejun Heo preempt_disable(); 1631142d810STejun Heo for_each_cpu(cpu, cpumask) 164*860a0ffaSThomas Gleixner cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu)); 1651142d810STejun Heo preempt_enable(); 166fd7355baSTejun Heo } 1671142d810STejun Heo 168fd7355baSTejun Heo static int __stop_cpus(const struct cpumask *cpumask, 169fd7355baSTejun Heo cpu_stop_fn_t fn, void *arg) 170fd7355baSTejun Heo { 171fd7355baSTejun Heo struct cpu_stop_done done; 172fd7355baSTejun Heo 173fd7355baSTejun Heo cpu_stop_init_done(&done, cpumask_weight(cpumask)); 174fd7355baSTejun Heo queue_stop_cpus_work(cpumask, fn, arg, &done); 1751142d810STejun Heo wait_for_completion(&done.completion); 1761142d810STejun Heo return done.executed ? done.ret : -ENOENT; 1771142d810STejun Heo } 1781142d810STejun Heo 1791142d810STejun Heo /** 1801142d810STejun Heo * stop_cpus - stop multiple cpus 1811142d810STejun Heo * @cpumask: cpus to stop 1821142d810STejun Heo * @fn: function to execute 1831142d810STejun Heo * @arg: argument to @fn 1841142d810STejun Heo * 1851142d810STejun Heo * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, 1861142d810STejun Heo * @fn is run in a process context with the highest priority 1871142d810STejun Heo * preempting any task on the cpu and monopolizing it. This function 1881142d810STejun Heo * returns after all executions are complete. 1891142d810STejun Heo * 1901142d810STejun Heo * This function doesn't guarantee the cpus in @cpumask stay online 1911142d810STejun Heo * till @fn completes. If some cpus go down in the middle, execution 1921142d810STejun Heo * on the cpu may happen partially or fully on different cpus. @fn 1931142d810STejun Heo * should either be ready for that or the caller should ensure that 1941142d810STejun Heo * the cpus stay online until this function completes. 1951142d810STejun Heo * 1961142d810STejun Heo * All stop_cpus() calls are serialized making it safe for @fn to wait 1971142d810STejun Heo * for all cpus to start executing it. 1981142d810STejun Heo * 1991142d810STejun Heo * CONTEXT: 2001142d810STejun Heo * Might sleep. 2011142d810STejun Heo * 2021142d810STejun Heo * RETURNS: 2031142d810STejun Heo * -ENOENT if @fn(@arg) was not executed at all because all cpus in 2041142d810STejun Heo * @cpumask were offline; otherwise, 0 if all executions of @fn 2051142d810STejun Heo * returned 0, any non zero return value if any returned non zero. 2061142d810STejun Heo */ 2071142d810STejun Heo int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 2081142d810STejun Heo { 2091142d810STejun Heo int ret; 2101142d810STejun Heo 2111142d810STejun Heo /* static works are used, process one request at a time */ 2121142d810STejun Heo mutex_lock(&stop_cpus_mutex); 2131142d810STejun Heo ret = __stop_cpus(cpumask, fn, arg); 2141142d810STejun Heo mutex_unlock(&stop_cpus_mutex); 2151142d810STejun Heo return ret; 2161142d810STejun Heo } 2171142d810STejun Heo 2181142d810STejun Heo /** 2191142d810STejun Heo * try_stop_cpus - try to stop multiple cpus 2201142d810STejun Heo * @cpumask: cpus to stop 2211142d810STejun Heo * @fn: function to execute 2221142d810STejun Heo * @arg: argument to @fn 2231142d810STejun Heo * 2241142d810STejun Heo * Identical to stop_cpus() except that it fails with -EAGAIN if 2251142d810STejun Heo * someone else is already using the facility. 2261142d810STejun Heo * 2271142d810STejun Heo * CONTEXT: 2281142d810STejun Heo * Might sleep. 2291142d810STejun Heo * 2301142d810STejun Heo * RETURNS: 2311142d810STejun Heo * -EAGAIN if someone else is already stopping cpus, -ENOENT if 2321142d810STejun Heo * @fn(@arg) was not executed at all because all cpus in @cpumask were 2331142d810STejun Heo * offline; otherwise, 0 if all executions of @fn returned 0, any non 2341142d810STejun Heo * zero return value if any returned non zero. 2351142d810STejun Heo */ 2361142d810STejun Heo int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 2371142d810STejun Heo { 2381142d810STejun Heo int ret; 2391142d810STejun Heo 2401142d810STejun Heo /* static works are used, process one request at a time */ 2411142d810STejun Heo if (!mutex_trylock(&stop_cpus_mutex)) 2421142d810STejun Heo return -EAGAIN; 2431142d810STejun Heo ret = __stop_cpus(cpumask, fn, arg); 2441142d810STejun Heo mutex_unlock(&stop_cpus_mutex); 2451142d810STejun Heo return ret; 2461142d810STejun Heo } 2471142d810STejun Heo 2481142d810STejun Heo static int cpu_stopper_thread(void *data) 2491142d810STejun Heo { 2501142d810STejun Heo struct cpu_stopper *stopper = data; 2511142d810STejun Heo struct cpu_stop_work *work; 2521142d810STejun Heo int ret; 2531142d810STejun Heo 2541142d810STejun Heo repeat: 2551142d810STejun Heo set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ 2561142d810STejun Heo 2571142d810STejun Heo if (kthread_should_stop()) { 2581142d810STejun Heo __set_current_state(TASK_RUNNING); 2591142d810STejun Heo return 0; 2601142d810STejun Heo } 2611142d810STejun Heo 2621142d810STejun Heo work = NULL; 2631142d810STejun Heo spin_lock_irq(&stopper->lock); 2641142d810STejun Heo if (!list_empty(&stopper->works)) { 2651142d810STejun Heo work = list_first_entry(&stopper->works, 2661142d810STejun Heo struct cpu_stop_work, list); 2671142d810STejun Heo list_del_init(&work->list); 2681142d810STejun Heo } 2691142d810STejun Heo spin_unlock_irq(&stopper->lock); 2701142d810STejun Heo 2711142d810STejun Heo if (work) { 2721142d810STejun Heo cpu_stop_fn_t fn = work->fn; 2731142d810STejun Heo void *arg = work->arg; 2741142d810STejun Heo struct cpu_stop_done *done = work->done; 275ca51c5a7SRakib Mullick char ksym_buf[KSYM_NAME_LEN] __maybe_unused; 2761142d810STejun Heo 2771142d810STejun Heo __set_current_state(TASK_RUNNING); 2781142d810STejun Heo 2791142d810STejun Heo /* cpu stop callbacks are not allowed to sleep */ 2801142d810STejun Heo preempt_disable(); 2811142d810STejun Heo 2821142d810STejun Heo ret = fn(arg); 2831142d810STejun Heo if (ret) 2841142d810STejun Heo done->ret = ret; 2851142d810STejun Heo 2861142d810STejun Heo /* restore preemption and check it's still balanced */ 2871142d810STejun Heo preempt_enable(); 2881142d810STejun Heo WARN_ONCE(preempt_count(), 2891142d810STejun Heo "cpu_stop: %s(%p) leaked preempt count\n", 2901142d810STejun Heo kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL, 2911142d810STejun Heo ksym_buf), arg); 2921142d810STejun Heo 2931142d810STejun Heo cpu_stop_signal_done(done, true); 2941142d810STejun Heo } else 2951142d810STejun Heo schedule(); 2961142d810STejun Heo 2971142d810STejun Heo goto repeat; 2981142d810STejun Heo } 2991142d810STejun Heo 30034f971f6SPeter Zijlstra extern void sched_set_stop_task(int cpu, struct task_struct *stop); 30134f971f6SPeter Zijlstra 3021142d810STejun Heo /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ 3031142d810STejun Heo static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, 3041142d810STejun Heo unsigned long action, void *hcpu) 3051142d810STejun Heo { 3061142d810STejun Heo unsigned int cpu = (unsigned long)hcpu; 3071142d810STejun Heo struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 308*860a0ffaSThomas Gleixner struct task_struct *p = per_cpu(cpu_stopper_task, cpu); 3091142d810STejun Heo 3101142d810STejun Heo switch (action & ~CPU_TASKS_FROZEN) { 3111142d810STejun Heo case CPU_UP_PREPARE: 312*860a0ffaSThomas Gleixner BUG_ON(p || stopper->enabled || !list_empty(&stopper->works)); 31394dcf29aSEric Dumazet p = kthread_create_on_node(cpu_stopper_thread, 31494dcf29aSEric Dumazet stopper, 31594dcf29aSEric Dumazet cpu_to_node(cpu), 31694dcf29aSEric Dumazet "migration/%d", cpu); 3171142d810STejun Heo if (IS_ERR(p)) 3184ce6494dSAkinobu Mita return notifier_from_errno(PTR_ERR(p)); 3191142d810STejun Heo get_task_struct(p); 32034f971f6SPeter Zijlstra kthread_bind(p, cpu); 32134f971f6SPeter Zijlstra sched_set_stop_task(cpu, p); 322*860a0ffaSThomas Gleixner per_cpu(cpu_stopper_task, cpu) = p; 3231142d810STejun Heo break; 3241142d810STejun Heo 3251142d810STejun Heo case CPU_ONLINE: 3261142d810STejun Heo /* strictly unnecessary, as first user will wake it */ 327*860a0ffaSThomas Gleixner wake_up_process(p); 3281142d810STejun Heo /* mark enabled */ 3291142d810STejun Heo spin_lock_irq(&stopper->lock); 3301142d810STejun Heo stopper->enabled = true; 3311142d810STejun Heo spin_unlock_irq(&stopper->lock); 3321142d810STejun Heo break; 3331142d810STejun Heo 3341142d810STejun Heo #ifdef CONFIG_HOTPLUG_CPU 3351142d810STejun Heo case CPU_UP_CANCELED: 33654e88fadSAmit K. Arora case CPU_POST_DEAD: 3379c6f7e43SIngo Molnar { 3389c6f7e43SIngo Molnar struct cpu_stop_work *work; 3399c6f7e43SIngo Molnar 34034f971f6SPeter Zijlstra sched_set_stop_task(cpu, NULL); 3411142d810STejun Heo /* kill the stopper */ 342*860a0ffaSThomas Gleixner kthread_stop(p); 3431142d810STejun Heo /* drain remaining works */ 3441142d810STejun Heo spin_lock_irq(&stopper->lock); 3451142d810STejun Heo list_for_each_entry(work, &stopper->works, list) 3461142d810STejun Heo cpu_stop_signal_done(work->done, false); 3471142d810STejun Heo stopper->enabled = false; 3481142d810STejun Heo spin_unlock_irq(&stopper->lock); 3491142d810STejun Heo /* release the stopper */ 350*860a0ffaSThomas Gleixner put_task_struct(p); 351*860a0ffaSThomas Gleixner per_cpu(cpu_stopper_task, cpu) = NULL; 3521142d810STejun Heo break; 3539c6f7e43SIngo Molnar } 3541142d810STejun Heo #endif 3551142d810STejun Heo } 3561142d810STejun Heo 3571142d810STejun Heo return NOTIFY_OK; 3581142d810STejun Heo } 3591142d810STejun Heo 3601142d810STejun Heo /* 3611142d810STejun Heo * Give it a higher priority so that cpu stopper is available to other 3621142d810STejun Heo * cpu notifiers. It currently shares the same priority as sched 3631142d810STejun Heo * migration_notifier. 3641142d810STejun Heo */ 3651142d810STejun Heo static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { 3661142d810STejun Heo .notifier_call = cpu_stop_cpu_callback, 3671142d810STejun Heo .priority = 10, 3681142d810STejun Heo }; 3691142d810STejun Heo 3701142d810STejun Heo static int __init cpu_stop_init(void) 3711142d810STejun Heo { 3721142d810STejun Heo void *bcpu = (void *)(long)smp_processor_id(); 3731142d810STejun Heo unsigned int cpu; 3741142d810STejun Heo int err; 3751142d810STejun Heo 3761142d810STejun Heo for_each_possible_cpu(cpu) { 3771142d810STejun Heo struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 3781142d810STejun Heo 3791142d810STejun Heo spin_lock_init(&stopper->lock); 3801142d810STejun Heo INIT_LIST_HEAD(&stopper->works); 3811142d810STejun Heo } 3821142d810STejun Heo 3831142d810STejun Heo /* start one for the boot cpu */ 3841142d810STejun Heo err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, 3851142d810STejun Heo bcpu); 3864ce6494dSAkinobu Mita BUG_ON(err != NOTIFY_OK); 3871142d810STejun Heo cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); 3881142d810STejun Heo register_cpu_notifier(&cpu_stop_cpu_notifier); 3891142d810STejun Heo 390f445027eSJeremy Fitzhardinge stop_machine_initialized = true; 391f445027eSJeremy Fitzhardinge 3921142d810STejun Heo return 0; 3931142d810STejun Heo } 3941142d810STejun Heo early_initcall(cpu_stop_init); 3951da177e4SLinus Torvalds 396bbf1bb3eSTejun Heo #ifdef CONFIG_STOP_MACHINE 397bbf1bb3eSTejun Heo 398ffdb5976SRusty Russell /* This controls the threads on each CPU. */ 3991da177e4SLinus Torvalds enum stopmachine_state { 400ffdb5976SRusty Russell /* Dummy starting state for thread. */ 401ffdb5976SRusty Russell STOPMACHINE_NONE, 402ffdb5976SRusty Russell /* Awaiting everyone to be scheduled. */ 4031da177e4SLinus Torvalds STOPMACHINE_PREPARE, 404ffdb5976SRusty Russell /* Disable interrupts. */ 4051da177e4SLinus Torvalds STOPMACHINE_DISABLE_IRQ, 406ffdb5976SRusty Russell /* Run the function */ 4075c2aed62SJason Baron STOPMACHINE_RUN, 408ffdb5976SRusty Russell /* Exit */ 4091da177e4SLinus Torvalds STOPMACHINE_EXIT, 4101da177e4SLinus Torvalds }; 4111da177e4SLinus Torvalds 4125c2aed62SJason Baron struct stop_machine_data { 4135c2aed62SJason Baron int (*fn)(void *); 4145c2aed62SJason Baron void *data; 4153fc1f1e2STejun Heo /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 4163fc1f1e2STejun Heo unsigned int num_threads; 4173fc1f1e2STejun Heo const struct cpumask *active_cpus; 4183fc1f1e2STejun Heo 4193fc1f1e2STejun Heo enum stopmachine_state state; 4203fc1f1e2STejun Heo atomic_t thread_ack; 421ffdb5976SRusty Russell }; 4225c2aed62SJason Baron 4233fc1f1e2STejun Heo static void set_state(struct stop_machine_data *smdata, 4243fc1f1e2STejun Heo enum stopmachine_state newstate) 4251da177e4SLinus Torvalds { 426ffdb5976SRusty Russell /* Reset ack counter. */ 4273fc1f1e2STejun Heo atomic_set(&smdata->thread_ack, smdata->num_threads); 428ffdb5976SRusty Russell smp_wmb(); 4293fc1f1e2STejun Heo smdata->state = newstate; 430ffdb5976SRusty Russell } 4311da177e4SLinus Torvalds 432ffdb5976SRusty Russell /* Last one to ack a state moves to the next state. */ 4333fc1f1e2STejun Heo static void ack_state(struct stop_machine_data *smdata) 434ffdb5976SRusty Russell { 4353fc1f1e2STejun Heo if (atomic_dec_and_test(&smdata->thread_ack)) 4363fc1f1e2STejun Heo set_state(smdata, smdata->state + 1); 437ffdb5976SRusty Russell } 438d8cb7c1dSAndrew Morton 4393fc1f1e2STejun Heo /* This is the cpu_stop function which stops the CPU. */ 4403fc1f1e2STejun Heo static int stop_machine_cpu_stop(void *data) 441ffdb5976SRusty Russell { 4423fc1f1e2STejun Heo struct stop_machine_data *smdata = data; 443ffdb5976SRusty Russell enum stopmachine_state curstate = STOPMACHINE_NONE; 4443fc1f1e2STejun Heo int cpu = smp_processor_id(), err = 0; 445f740e6cdSTejun Heo unsigned long flags; 4463fc1f1e2STejun Heo bool is_active; 4471da177e4SLinus Torvalds 448f740e6cdSTejun Heo /* 449f740e6cdSTejun Heo * When called from stop_machine_from_inactive_cpu(), irq might 450f740e6cdSTejun Heo * already be disabled. Save the state and restore it on exit. 451f740e6cdSTejun Heo */ 452f740e6cdSTejun Heo local_save_flags(flags); 453f740e6cdSTejun Heo 4543fc1f1e2STejun Heo if (!smdata->active_cpus) 4553fc1f1e2STejun Heo is_active = cpu == cpumask_first(cpu_online_mask); 4563fc1f1e2STejun Heo else 4573fc1f1e2STejun Heo is_active = cpumask_test_cpu(cpu, smdata->active_cpus); 4583fc1f1e2STejun Heo 4591da177e4SLinus Torvalds /* Simple state machine */ 460ffdb5976SRusty Russell do { 461ffdb5976SRusty Russell /* Chill out and ensure we re-read stopmachine_state. */ 462ffdb5976SRusty Russell cpu_relax(); 4633fc1f1e2STejun Heo if (smdata->state != curstate) { 4643fc1f1e2STejun Heo curstate = smdata->state; 465ffdb5976SRusty Russell switch (curstate) { 466ffdb5976SRusty Russell case STOPMACHINE_DISABLE_IRQ: 4671da177e4SLinus Torvalds local_irq_disable(); 468a12bb444SBenjamin Herrenschmidt hard_irq_disable(); 469ffdb5976SRusty Russell break; 470ffdb5976SRusty Russell case STOPMACHINE_RUN: 4713fc1f1e2STejun Heo if (is_active) 4728163bcacSHeiko Carstens err = smdata->fn(smdata->data); 473ffdb5976SRusty Russell break; 474ffdb5976SRusty Russell default: 475ffdb5976SRusty Russell break; 4761da177e4SLinus Torvalds } 4773fc1f1e2STejun Heo ack_state(smdata); 4781da177e4SLinus Torvalds } 479ffdb5976SRusty Russell } while (curstate != STOPMACHINE_EXIT); 4801da177e4SLinus Torvalds 481f740e6cdSTejun Heo local_irq_restore(flags); 4823fc1f1e2STejun Heo return err; 483ffdb5976SRusty Russell } 4841da177e4SLinus Torvalds 48541c7bb95SRusty Russell int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 4861da177e4SLinus Torvalds { 4873fc1f1e2STejun Heo struct stop_machine_data smdata = { .fn = fn, .data = data, 4883fc1f1e2STejun Heo .num_threads = num_online_cpus(), 4893fc1f1e2STejun Heo .active_cpus = cpus }; 4901da177e4SLinus Torvalds 491f445027eSJeremy Fitzhardinge if (!stop_machine_initialized) { 492f445027eSJeremy Fitzhardinge /* 493f445027eSJeremy Fitzhardinge * Handle the case where stop_machine() is called 494f445027eSJeremy Fitzhardinge * early in boot before stop_machine() has been 495f445027eSJeremy Fitzhardinge * initialized. 496f445027eSJeremy Fitzhardinge */ 497f445027eSJeremy Fitzhardinge unsigned long flags; 498f445027eSJeremy Fitzhardinge int ret; 499f445027eSJeremy Fitzhardinge 500f445027eSJeremy Fitzhardinge WARN_ON_ONCE(smdata.num_threads != 1); 501f445027eSJeremy Fitzhardinge 502f445027eSJeremy Fitzhardinge local_irq_save(flags); 503f445027eSJeremy Fitzhardinge hard_irq_disable(); 504f445027eSJeremy Fitzhardinge ret = (*fn)(data); 505f445027eSJeremy Fitzhardinge local_irq_restore(flags); 506f445027eSJeremy Fitzhardinge 507f445027eSJeremy Fitzhardinge return ret; 508f445027eSJeremy Fitzhardinge } 509f445027eSJeremy Fitzhardinge 5103fc1f1e2STejun Heo /* Set the initial state and stop all online cpus. */ 5113fc1f1e2STejun Heo set_state(&smdata, STOPMACHINE_PREPARE); 5123fc1f1e2STejun Heo return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); 5131da177e4SLinus Torvalds } 5141da177e4SLinus Torvalds 51541c7bb95SRusty Russell int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 5161da177e4SLinus Torvalds { 5171da177e4SLinus Torvalds int ret; 5181da177e4SLinus Torvalds 5191da177e4SLinus Torvalds /* No CPUs can come up or down during this. */ 52086ef5c9aSGautham R Shenoy get_online_cpus(); 521eeec4fadSRusty Russell ret = __stop_machine(fn, data, cpus); 52286ef5c9aSGautham R Shenoy put_online_cpus(); 5231da177e4SLinus Torvalds return ret; 5241da177e4SLinus Torvalds } 525eeec4fadSRusty Russell EXPORT_SYMBOL_GPL(stop_machine); 526bbf1bb3eSTejun Heo 527f740e6cdSTejun Heo /** 528f740e6cdSTejun Heo * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU 529f740e6cdSTejun Heo * @fn: the function to run 530f740e6cdSTejun Heo * @data: the data ptr for the @fn() 531f740e6cdSTejun Heo * @cpus: the cpus to run the @fn() on (NULL = any online cpu) 532f740e6cdSTejun Heo * 533f740e6cdSTejun Heo * This is identical to stop_machine() but can be called from a CPU which 534f740e6cdSTejun Heo * is not active. The local CPU is in the process of hotplug (so no other 535f740e6cdSTejun Heo * CPU hotplug can start) and not marked active and doesn't have enough 536f740e6cdSTejun Heo * context to sleep. 537f740e6cdSTejun Heo * 538f740e6cdSTejun Heo * This function provides stop_machine() functionality for such state by 539f740e6cdSTejun Heo * using busy-wait for synchronization and executing @fn directly for local 540f740e6cdSTejun Heo * CPU. 541f740e6cdSTejun Heo * 542f740e6cdSTejun Heo * CONTEXT: 543f740e6cdSTejun Heo * Local CPU is inactive. Temporarily stops all active CPUs. 544f740e6cdSTejun Heo * 545f740e6cdSTejun Heo * RETURNS: 546f740e6cdSTejun Heo * 0 if all executions of @fn returned 0, any non zero return value if any 547f740e6cdSTejun Heo * returned non zero. 548f740e6cdSTejun Heo */ 549f740e6cdSTejun Heo int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, 550f740e6cdSTejun Heo const struct cpumask *cpus) 551f740e6cdSTejun Heo { 552f740e6cdSTejun Heo struct stop_machine_data smdata = { .fn = fn, .data = data, 553f740e6cdSTejun Heo .active_cpus = cpus }; 554f740e6cdSTejun Heo struct cpu_stop_done done; 555f740e6cdSTejun Heo int ret; 556f740e6cdSTejun Heo 557f740e6cdSTejun Heo /* Local CPU must be inactive and CPU hotplug in progress. */ 558f740e6cdSTejun Heo BUG_ON(cpu_active(raw_smp_processor_id())); 559f740e6cdSTejun Heo smdata.num_threads = num_active_cpus() + 1; /* +1 for local */ 560f740e6cdSTejun Heo 561f740e6cdSTejun Heo /* No proper task established and can't sleep - busy wait for lock. */ 562f740e6cdSTejun Heo while (!mutex_trylock(&stop_cpus_mutex)) 563f740e6cdSTejun Heo cpu_relax(); 564f740e6cdSTejun Heo 565f740e6cdSTejun Heo /* Schedule work on other CPUs and execute directly for local CPU */ 566f740e6cdSTejun Heo set_state(&smdata, STOPMACHINE_PREPARE); 567f740e6cdSTejun Heo cpu_stop_init_done(&done, num_active_cpus()); 568f740e6cdSTejun Heo queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata, 569f740e6cdSTejun Heo &done); 570f740e6cdSTejun Heo ret = stop_machine_cpu_stop(&smdata); 571f740e6cdSTejun Heo 572f740e6cdSTejun Heo /* Busy wait for completion. */ 573f740e6cdSTejun Heo while (!completion_done(&done.completion)) 574f740e6cdSTejun Heo cpu_relax(); 575f740e6cdSTejun Heo 576f740e6cdSTejun Heo mutex_unlock(&stop_cpus_mutex); 577f740e6cdSTejun Heo return ret ?: done.ret; 578f740e6cdSTejun Heo } 579f740e6cdSTejun Heo 580bbf1bb3eSTejun Heo #endif /* CONFIG_STOP_MACHINE */ 581