11142d810STejun Heo /* 21142d810STejun Heo * kernel/stop_machine.c 31142d810STejun Heo * 41142d810STejun Heo * Copyright (C) 2008, 2005 IBM Corporation. 51142d810STejun Heo * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au 61142d810STejun Heo * Copyright (C) 2010 SUSE Linux Products GmbH 71142d810STejun Heo * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 81142d810STejun Heo * 91142d810STejun Heo * This file is released under the GPLv2 and any later version. 10e5582ca2SRusty Russell */ 111142d810STejun Heo #include <linux/completion.h> 121da177e4SLinus Torvalds #include <linux/cpu.h> 131142d810STejun Heo #include <linux/init.h> 14ee527cd3SPrarit Bhargava #include <linux/kthread.h> 15ee527cd3SPrarit Bhargava #include <linux/module.h> 161142d810STejun Heo #include <linux/percpu.h> 17ee527cd3SPrarit Bhargava #include <linux/sched.h> 18ee527cd3SPrarit Bhargava #include <linux/stop_machine.h> 19a12bb444SBenjamin Herrenschmidt #include <linux/interrupt.h> 201142d810STejun Heo #include <linux/kallsyms.h> 21a12bb444SBenjamin Herrenschmidt 22*60063497SArun Sharma #include <linux/atomic.h> 231142d810STejun Heo 241142d810STejun Heo /* 251142d810STejun Heo * Structure to determine completion condition and record errors. May 261142d810STejun Heo * be shared by works on different cpus. 271142d810STejun Heo */ 281142d810STejun Heo struct cpu_stop_done { 291142d810STejun Heo atomic_t nr_todo; /* nr left to execute */ 301142d810STejun Heo bool executed; /* actually executed? */ 311142d810STejun Heo int ret; /* collected return value */ 321142d810STejun Heo struct completion completion; /* fired if nr_todo reaches 0 */ 331142d810STejun Heo }; 341142d810STejun Heo 351142d810STejun Heo /* the actual stopper, one per every possible cpu, enabled on online cpus */ 361142d810STejun Heo struct cpu_stopper { 371142d810STejun Heo spinlock_t lock; 38878ae127SRichard Kennedy bool enabled; /* is this stopper enabled? */ 391142d810STejun Heo struct list_head works; /* list of pending works */ 401142d810STejun Heo struct task_struct *thread; /* stopper thread */ 411142d810STejun Heo }; 421142d810STejun Heo 431142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); 441142d810STejun Heo 451142d810STejun Heo static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) 461142d810STejun Heo { 471142d810STejun Heo memset(done, 0, sizeof(*done)); 481142d810STejun Heo atomic_set(&done->nr_todo, nr_todo); 491142d810STejun Heo init_completion(&done->completion); 501142d810STejun Heo } 511142d810STejun Heo 521142d810STejun Heo /* signal completion unless @done is NULL */ 531142d810STejun Heo static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) 541142d810STejun Heo { 551142d810STejun Heo if (done) { 561142d810STejun Heo if (executed) 571142d810STejun Heo done->executed = true; 581142d810STejun Heo if (atomic_dec_and_test(&done->nr_todo)) 591142d810STejun Heo complete(&done->completion); 601142d810STejun Heo } 611142d810STejun Heo } 621142d810STejun Heo 631142d810STejun Heo /* queue @work to @stopper. if offline, @work is completed immediately */ 641142d810STejun Heo static void cpu_stop_queue_work(struct cpu_stopper *stopper, 651142d810STejun Heo struct cpu_stop_work *work) 661142d810STejun Heo { 671142d810STejun Heo unsigned long flags; 681142d810STejun Heo 691142d810STejun Heo spin_lock_irqsave(&stopper->lock, flags); 701142d810STejun Heo 711142d810STejun Heo if (stopper->enabled) { 721142d810STejun Heo list_add_tail(&work->list, &stopper->works); 731142d810STejun Heo wake_up_process(stopper->thread); 741142d810STejun Heo } else 751142d810STejun Heo cpu_stop_signal_done(work->done, false); 761142d810STejun Heo 771142d810STejun Heo spin_unlock_irqrestore(&stopper->lock, flags); 781142d810STejun Heo } 791142d810STejun Heo 801142d810STejun Heo /** 811142d810STejun Heo * stop_one_cpu - stop a cpu 821142d810STejun Heo * @cpu: cpu to stop 831142d810STejun Heo * @fn: function to execute 841142d810STejun Heo * @arg: argument to @fn 851142d810STejun Heo * 861142d810STejun Heo * Execute @fn(@arg) on @cpu. @fn is run in a process context with 871142d810STejun Heo * the highest priority preempting any task on the cpu and 881142d810STejun Heo * monopolizing it. This function returns after the execution is 891142d810STejun Heo * complete. 901142d810STejun Heo * 911142d810STejun Heo * This function doesn't guarantee @cpu stays online till @fn 921142d810STejun Heo * completes. If @cpu goes down in the middle, execution may happen 931142d810STejun Heo * partially or fully on different cpus. @fn should either be ready 941142d810STejun Heo * for that or the caller should ensure that @cpu stays online until 951142d810STejun Heo * this function completes. 961142d810STejun Heo * 971142d810STejun Heo * CONTEXT: 981142d810STejun Heo * Might sleep. 991142d810STejun Heo * 1001142d810STejun Heo * RETURNS: 1011142d810STejun Heo * -ENOENT if @fn(@arg) was not executed because @cpu was offline; 1021142d810STejun Heo * otherwise, the return value of @fn. 1031142d810STejun Heo */ 1041142d810STejun Heo int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) 1051142d810STejun Heo { 1061142d810STejun Heo struct cpu_stop_done done; 1071142d810STejun Heo struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; 1081142d810STejun Heo 1091142d810STejun Heo cpu_stop_init_done(&done, 1); 1101142d810STejun Heo cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work); 1111142d810STejun Heo wait_for_completion(&done.completion); 1121142d810STejun Heo return done.executed ? done.ret : -ENOENT; 1131142d810STejun Heo } 1141142d810STejun Heo 1151142d810STejun Heo /** 1161142d810STejun Heo * stop_one_cpu_nowait - stop a cpu but don't wait for completion 1171142d810STejun Heo * @cpu: cpu to stop 1181142d810STejun Heo * @fn: function to execute 1191142d810STejun Heo * @arg: argument to @fn 1201142d810STejun Heo * 1211142d810STejun Heo * Similar to stop_one_cpu() but doesn't wait for completion. The 1221142d810STejun Heo * caller is responsible for ensuring @work_buf is currently unused 1231142d810STejun Heo * and will remain untouched until stopper starts executing @fn. 1241142d810STejun Heo * 1251142d810STejun Heo * CONTEXT: 1261142d810STejun Heo * Don't care. 1271142d810STejun Heo */ 1281142d810STejun Heo void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, 1291142d810STejun Heo struct cpu_stop_work *work_buf) 1301142d810STejun Heo { 1311142d810STejun Heo *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; 1321142d810STejun Heo cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); 1331142d810STejun Heo } 1341142d810STejun Heo 1351142d810STejun Heo /* static data for stop_cpus */ 136192d8857SSuresh Siddha static DEFINE_MUTEX(stop_cpus_mutex); 1371142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); 1381142d810STejun Heo 139fd7355baSTejun Heo static void queue_stop_cpus_work(const struct cpumask *cpumask, 140fd7355baSTejun Heo cpu_stop_fn_t fn, void *arg, 141fd7355baSTejun Heo struct cpu_stop_done *done) 1421142d810STejun Heo { 1431142d810STejun Heo struct cpu_stop_work *work; 1441142d810STejun Heo unsigned int cpu; 1451142d810STejun Heo 1461142d810STejun Heo /* initialize works and done */ 1471142d810STejun Heo for_each_cpu(cpu, cpumask) { 1481142d810STejun Heo work = &per_cpu(stop_cpus_work, cpu); 1491142d810STejun Heo work->fn = fn; 1501142d810STejun Heo work->arg = arg; 151fd7355baSTejun Heo work->done = done; 1521142d810STejun Heo } 1531142d810STejun Heo 1541142d810STejun Heo /* 1551142d810STejun Heo * Disable preemption while queueing to avoid getting 1561142d810STejun Heo * preempted by a stopper which might wait for other stoppers 1571142d810STejun Heo * to enter @fn which can lead to deadlock. 1581142d810STejun Heo */ 1591142d810STejun Heo preempt_disable(); 1601142d810STejun Heo for_each_cpu(cpu, cpumask) 1611142d810STejun Heo cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), 1621142d810STejun Heo &per_cpu(stop_cpus_work, cpu)); 1631142d810STejun Heo preempt_enable(); 164fd7355baSTejun Heo } 1651142d810STejun Heo 166fd7355baSTejun Heo static int __stop_cpus(const struct cpumask *cpumask, 167fd7355baSTejun Heo cpu_stop_fn_t fn, void *arg) 168fd7355baSTejun Heo { 169fd7355baSTejun Heo struct cpu_stop_done done; 170fd7355baSTejun Heo 171fd7355baSTejun Heo cpu_stop_init_done(&done, cpumask_weight(cpumask)); 172fd7355baSTejun Heo queue_stop_cpus_work(cpumask, fn, arg, &done); 1731142d810STejun Heo wait_for_completion(&done.completion); 1741142d810STejun Heo return done.executed ? done.ret : -ENOENT; 1751142d810STejun Heo } 1761142d810STejun Heo 1771142d810STejun Heo /** 1781142d810STejun Heo * stop_cpus - stop multiple cpus 1791142d810STejun Heo * @cpumask: cpus to stop 1801142d810STejun Heo * @fn: function to execute 1811142d810STejun Heo * @arg: argument to @fn 1821142d810STejun Heo * 1831142d810STejun Heo * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, 1841142d810STejun Heo * @fn is run in a process context with the highest priority 1851142d810STejun Heo * preempting any task on the cpu and monopolizing it. This function 1861142d810STejun Heo * returns after all executions are complete. 1871142d810STejun Heo * 1881142d810STejun Heo * This function doesn't guarantee the cpus in @cpumask stay online 1891142d810STejun Heo * till @fn completes. If some cpus go down in the middle, execution 1901142d810STejun Heo * on the cpu may happen partially or fully on different cpus. @fn 1911142d810STejun Heo * should either be ready for that or the caller should ensure that 1921142d810STejun Heo * the cpus stay online until this function completes. 1931142d810STejun Heo * 1941142d810STejun Heo * All stop_cpus() calls are serialized making it safe for @fn to wait 1951142d810STejun Heo * for all cpus to start executing it. 1961142d810STejun Heo * 1971142d810STejun Heo * CONTEXT: 1981142d810STejun Heo * Might sleep. 1991142d810STejun Heo * 2001142d810STejun Heo * RETURNS: 2011142d810STejun Heo * -ENOENT if @fn(@arg) was not executed at all because all cpus in 2021142d810STejun Heo * @cpumask were offline; otherwise, 0 if all executions of @fn 2031142d810STejun Heo * returned 0, any non zero return value if any returned non zero. 2041142d810STejun Heo */ 2051142d810STejun Heo int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 2061142d810STejun Heo { 2071142d810STejun Heo int ret; 2081142d810STejun Heo 2091142d810STejun Heo /* static works are used, process one request at a time */ 2101142d810STejun Heo mutex_lock(&stop_cpus_mutex); 2111142d810STejun Heo ret = __stop_cpus(cpumask, fn, arg); 2121142d810STejun Heo mutex_unlock(&stop_cpus_mutex); 2131142d810STejun Heo return ret; 2141142d810STejun Heo } 2151142d810STejun Heo 2161142d810STejun Heo /** 2171142d810STejun Heo * try_stop_cpus - try to stop multiple cpus 2181142d810STejun Heo * @cpumask: cpus to stop 2191142d810STejun Heo * @fn: function to execute 2201142d810STejun Heo * @arg: argument to @fn 2211142d810STejun Heo * 2221142d810STejun Heo * Identical to stop_cpus() except that it fails with -EAGAIN if 2231142d810STejun Heo * someone else is already using the facility. 2241142d810STejun Heo * 2251142d810STejun Heo * CONTEXT: 2261142d810STejun Heo * Might sleep. 2271142d810STejun Heo * 2281142d810STejun Heo * RETURNS: 2291142d810STejun Heo * -EAGAIN if someone else is already stopping cpus, -ENOENT if 2301142d810STejun Heo * @fn(@arg) was not executed at all because all cpus in @cpumask were 2311142d810STejun Heo * offline; otherwise, 0 if all executions of @fn returned 0, any non 2321142d810STejun Heo * zero return value if any returned non zero. 2331142d810STejun Heo */ 2341142d810STejun Heo int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 2351142d810STejun Heo { 2361142d810STejun Heo int ret; 2371142d810STejun Heo 2381142d810STejun Heo /* static works are used, process one request at a time */ 2391142d810STejun Heo if (!mutex_trylock(&stop_cpus_mutex)) 2401142d810STejun Heo return -EAGAIN; 2411142d810STejun Heo ret = __stop_cpus(cpumask, fn, arg); 2421142d810STejun Heo mutex_unlock(&stop_cpus_mutex); 2431142d810STejun Heo return ret; 2441142d810STejun Heo } 2451142d810STejun Heo 2461142d810STejun Heo static int cpu_stopper_thread(void *data) 2471142d810STejun Heo { 2481142d810STejun Heo struct cpu_stopper *stopper = data; 2491142d810STejun Heo struct cpu_stop_work *work; 2501142d810STejun Heo int ret; 2511142d810STejun Heo 2521142d810STejun Heo repeat: 2531142d810STejun Heo set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ 2541142d810STejun Heo 2551142d810STejun Heo if (kthread_should_stop()) { 2561142d810STejun Heo __set_current_state(TASK_RUNNING); 2571142d810STejun Heo return 0; 2581142d810STejun Heo } 2591142d810STejun Heo 2601142d810STejun Heo work = NULL; 2611142d810STejun Heo spin_lock_irq(&stopper->lock); 2621142d810STejun Heo if (!list_empty(&stopper->works)) { 2631142d810STejun Heo work = list_first_entry(&stopper->works, 2641142d810STejun Heo struct cpu_stop_work, list); 2651142d810STejun Heo list_del_init(&work->list); 2661142d810STejun Heo } 2671142d810STejun Heo spin_unlock_irq(&stopper->lock); 2681142d810STejun Heo 2691142d810STejun Heo if (work) { 2701142d810STejun Heo cpu_stop_fn_t fn = work->fn; 2711142d810STejun Heo void *arg = work->arg; 2721142d810STejun Heo struct cpu_stop_done *done = work->done; 273ca51c5a7SRakib Mullick char ksym_buf[KSYM_NAME_LEN] __maybe_unused; 2741142d810STejun Heo 2751142d810STejun Heo __set_current_state(TASK_RUNNING); 2761142d810STejun Heo 2771142d810STejun Heo /* cpu stop callbacks are not allowed to sleep */ 2781142d810STejun Heo preempt_disable(); 2791142d810STejun Heo 2801142d810STejun Heo ret = fn(arg); 2811142d810STejun Heo if (ret) 2821142d810STejun Heo done->ret = ret; 2831142d810STejun Heo 2841142d810STejun Heo /* restore preemption and check it's still balanced */ 2851142d810STejun Heo preempt_enable(); 2861142d810STejun Heo WARN_ONCE(preempt_count(), 2871142d810STejun Heo "cpu_stop: %s(%p) leaked preempt count\n", 2881142d810STejun Heo kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL, 2891142d810STejun Heo ksym_buf), arg); 2901142d810STejun Heo 2911142d810STejun Heo cpu_stop_signal_done(done, true); 2921142d810STejun Heo } else 2931142d810STejun Heo schedule(); 2941142d810STejun Heo 2951142d810STejun Heo goto repeat; 2961142d810STejun Heo } 2971142d810STejun Heo 29834f971f6SPeter Zijlstra extern void sched_set_stop_task(int cpu, struct task_struct *stop); 29934f971f6SPeter Zijlstra 3001142d810STejun Heo /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ 3011142d810STejun Heo static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, 3021142d810STejun Heo unsigned long action, void *hcpu) 3031142d810STejun Heo { 3041142d810STejun Heo unsigned int cpu = (unsigned long)hcpu; 3051142d810STejun Heo struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 3061142d810STejun Heo struct task_struct *p; 3071142d810STejun Heo 3081142d810STejun Heo switch (action & ~CPU_TASKS_FROZEN) { 3091142d810STejun Heo case CPU_UP_PREPARE: 3101142d810STejun Heo BUG_ON(stopper->thread || stopper->enabled || 3111142d810STejun Heo !list_empty(&stopper->works)); 31294dcf29aSEric Dumazet p = kthread_create_on_node(cpu_stopper_thread, 31394dcf29aSEric Dumazet stopper, 31494dcf29aSEric Dumazet cpu_to_node(cpu), 31594dcf29aSEric Dumazet "migration/%d", cpu); 3161142d810STejun Heo if (IS_ERR(p)) 3174ce6494dSAkinobu Mita return notifier_from_errno(PTR_ERR(p)); 3181142d810STejun Heo get_task_struct(p); 31934f971f6SPeter Zijlstra kthread_bind(p, cpu); 32034f971f6SPeter Zijlstra sched_set_stop_task(cpu, p); 3211142d810STejun Heo stopper->thread = p; 3221142d810STejun Heo break; 3231142d810STejun Heo 3241142d810STejun Heo case CPU_ONLINE: 3251142d810STejun Heo /* strictly unnecessary, as first user will wake it */ 3261142d810STejun Heo wake_up_process(stopper->thread); 3271142d810STejun Heo /* mark enabled */ 3281142d810STejun Heo spin_lock_irq(&stopper->lock); 3291142d810STejun Heo stopper->enabled = true; 3301142d810STejun Heo spin_unlock_irq(&stopper->lock); 3311142d810STejun Heo break; 3321142d810STejun Heo 3331142d810STejun Heo #ifdef CONFIG_HOTPLUG_CPU 3341142d810STejun Heo case CPU_UP_CANCELED: 33554e88fadSAmit K. Arora case CPU_POST_DEAD: 3369c6f7e43SIngo Molnar { 3379c6f7e43SIngo Molnar struct cpu_stop_work *work; 3389c6f7e43SIngo Molnar 33934f971f6SPeter Zijlstra sched_set_stop_task(cpu, NULL); 3401142d810STejun Heo /* kill the stopper */ 3411142d810STejun Heo kthread_stop(stopper->thread); 3421142d810STejun Heo /* drain remaining works */ 3431142d810STejun Heo spin_lock_irq(&stopper->lock); 3441142d810STejun Heo list_for_each_entry(work, &stopper->works, list) 3451142d810STejun Heo cpu_stop_signal_done(work->done, false); 3461142d810STejun Heo stopper->enabled = false; 3471142d810STejun Heo spin_unlock_irq(&stopper->lock); 3481142d810STejun Heo /* release the stopper */ 3491142d810STejun Heo put_task_struct(stopper->thread); 3501142d810STejun Heo stopper->thread = NULL; 3511142d810STejun Heo break; 3529c6f7e43SIngo Molnar } 3531142d810STejun Heo #endif 3541142d810STejun Heo } 3551142d810STejun Heo 3561142d810STejun Heo return NOTIFY_OK; 3571142d810STejun Heo } 3581142d810STejun Heo 3591142d810STejun Heo /* 3601142d810STejun Heo * Give it a higher priority so that cpu stopper is available to other 3611142d810STejun Heo * cpu notifiers. It currently shares the same priority as sched 3621142d810STejun Heo * migration_notifier. 3631142d810STejun Heo */ 3641142d810STejun Heo static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { 3651142d810STejun Heo .notifier_call = cpu_stop_cpu_callback, 3661142d810STejun Heo .priority = 10, 3671142d810STejun Heo }; 3681142d810STejun Heo 3691142d810STejun Heo static int __init cpu_stop_init(void) 3701142d810STejun Heo { 3711142d810STejun Heo void *bcpu = (void *)(long)smp_processor_id(); 3721142d810STejun Heo unsigned int cpu; 3731142d810STejun Heo int err; 3741142d810STejun Heo 3751142d810STejun Heo for_each_possible_cpu(cpu) { 3761142d810STejun Heo struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 3771142d810STejun Heo 3781142d810STejun Heo spin_lock_init(&stopper->lock); 3791142d810STejun Heo INIT_LIST_HEAD(&stopper->works); 3801142d810STejun Heo } 3811142d810STejun Heo 3821142d810STejun Heo /* start one for the boot cpu */ 3831142d810STejun Heo err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, 3841142d810STejun Heo bcpu); 3854ce6494dSAkinobu Mita BUG_ON(err != NOTIFY_OK); 3861142d810STejun Heo cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); 3871142d810STejun Heo register_cpu_notifier(&cpu_stop_cpu_notifier); 3881142d810STejun Heo 3891142d810STejun Heo return 0; 3901142d810STejun Heo } 3911142d810STejun Heo early_initcall(cpu_stop_init); 3921da177e4SLinus Torvalds 393bbf1bb3eSTejun Heo #ifdef CONFIG_STOP_MACHINE 394bbf1bb3eSTejun Heo 395ffdb5976SRusty Russell /* This controls the threads on each CPU. */ 3961da177e4SLinus Torvalds enum stopmachine_state { 397ffdb5976SRusty Russell /* Dummy starting state for thread. */ 398ffdb5976SRusty Russell STOPMACHINE_NONE, 399ffdb5976SRusty Russell /* Awaiting everyone to be scheduled. */ 4001da177e4SLinus Torvalds STOPMACHINE_PREPARE, 401ffdb5976SRusty Russell /* Disable interrupts. */ 4021da177e4SLinus Torvalds STOPMACHINE_DISABLE_IRQ, 403ffdb5976SRusty Russell /* Run the function */ 4045c2aed62SJason Baron STOPMACHINE_RUN, 405ffdb5976SRusty Russell /* Exit */ 4061da177e4SLinus Torvalds STOPMACHINE_EXIT, 4071da177e4SLinus Torvalds }; 4081da177e4SLinus Torvalds 4095c2aed62SJason Baron struct stop_machine_data { 4105c2aed62SJason Baron int (*fn)(void *); 4115c2aed62SJason Baron void *data; 4123fc1f1e2STejun Heo /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 4133fc1f1e2STejun Heo unsigned int num_threads; 4143fc1f1e2STejun Heo const struct cpumask *active_cpus; 4153fc1f1e2STejun Heo 4163fc1f1e2STejun Heo enum stopmachine_state state; 4173fc1f1e2STejun Heo atomic_t thread_ack; 418ffdb5976SRusty Russell }; 4195c2aed62SJason Baron 4203fc1f1e2STejun Heo static void set_state(struct stop_machine_data *smdata, 4213fc1f1e2STejun Heo enum stopmachine_state newstate) 4221da177e4SLinus Torvalds { 423ffdb5976SRusty Russell /* Reset ack counter. */ 4243fc1f1e2STejun Heo atomic_set(&smdata->thread_ack, smdata->num_threads); 425ffdb5976SRusty Russell smp_wmb(); 4263fc1f1e2STejun Heo smdata->state = newstate; 427ffdb5976SRusty Russell } 4281da177e4SLinus Torvalds 429ffdb5976SRusty Russell /* Last one to ack a state moves to the next state. */ 4303fc1f1e2STejun Heo static void ack_state(struct stop_machine_data *smdata) 431ffdb5976SRusty Russell { 4323fc1f1e2STejun Heo if (atomic_dec_and_test(&smdata->thread_ack)) 4333fc1f1e2STejun Heo set_state(smdata, smdata->state + 1); 434ffdb5976SRusty Russell } 435d8cb7c1dSAndrew Morton 4363fc1f1e2STejun Heo /* This is the cpu_stop function which stops the CPU. */ 4373fc1f1e2STejun Heo static int stop_machine_cpu_stop(void *data) 438ffdb5976SRusty Russell { 4393fc1f1e2STejun Heo struct stop_machine_data *smdata = data; 440ffdb5976SRusty Russell enum stopmachine_state curstate = STOPMACHINE_NONE; 4413fc1f1e2STejun Heo int cpu = smp_processor_id(), err = 0; 442f740e6cdSTejun Heo unsigned long flags; 4433fc1f1e2STejun Heo bool is_active; 4441da177e4SLinus Torvalds 445f740e6cdSTejun Heo /* 446f740e6cdSTejun Heo * When called from stop_machine_from_inactive_cpu(), irq might 447f740e6cdSTejun Heo * already be disabled. Save the state and restore it on exit. 448f740e6cdSTejun Heo */ 449f740e6cdSTejun Heo local_save_flags(flags); 450f740e6cdSTejun Heo 4513fc1f1e2STejun Heo if (!smdata->active_cpus) 4523fc1f1e2STejun Heo is_active = cpu == cpumask_first(cpu_online_mask); 4533fc1f1e2STejun Heo else 4543fc1f1e2STejun Heo is_active = cpumask_test_cpu(cpu, smdata->active_cpus); 4553fc1f1e2STejun Heo 4561da177e4SLinus Torvalds /* Simple state machine */ 457ffdb5976SRusty Russell do { 458ffdb5976SRusty Russell /* Chill out and ensure we re-read stopmachine_state. */ 459ffdb5976SRusty Russell cpu_relax(); 4603fc1f1e2STejun Heo if (smdata->state != curstate) { 4613fc1f1e2STejun Heo curstate = smdata->state; 462ffdb5976SRusty Russell switch (curstate) { 463ffdb5976SRusty Russell case STOPMACHINE_DISABLE_IRQ: 4641da177e4SLinus Torvalds local_irq_disable(); 465a12bb444SBenjamin Herrenschmidt hard_irq_disable(); 466ffdb5976SRusty Russell break; 467ffdb5976SRusty Russell case STOPMACHINE_RUN: 4683fc1f1e2STejun Heo if (is_active) 4698163bcacSHeiko Carstens err = smdata->fn(smdata->data); 470ffdb5976SRusty Russell break; 471ffdb5976SRusty Russell default: 472ffdb5976SRusty Russell break; 4731da177e4SLinus Torvalds } 4743fc1f1e2STejun Heo ack_state(smdata); 4751da177e4SLinus Torvalds } 476ffdb5976SRusty Russell } while (curstate != STOPMACHINE_EXIT); 4771da177e4SLinus Torvalds 478f740e6cdSTejun Heo local_irq_restore(flags); 4793fc1f1e2STejun Heo return err; 480ffdb5976SRusty Russell } 4811da177e4SLinus Torvalds 48241c7bb95SRusty Russell int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 4831da177e4SLinus Torvalds { 4843fc1f1e2STejun Heo struct stop_machine_data smdata = { .fn = fn, .data = data, 4853fc1f1e2STejun Heo .num_threads = num_online_cpus(), 4863fc1f1e2STejun Heo .active_cpus = cpus }; 4871da177e4SLinus Torvalds 4883fc1f1e2STejun Heo /* Set the initial state and stop all online cpus. */ 4893fc1f1e2STejun Heo set_state(&smdata, STOPMACHINE_PREPARE); 4903fc1f1e2STejun Heo return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); 4911da177e4SLinus Torvalds } 4921da177e4SLinus Torvalds 49341c7bb95SRusty Russell int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 4941da177e4SLinus Torvalds { 4951da177e4SLinus Torvalds int ret; 4961da177e4SLinus Torvalds 4971da177e4SLinus Torvalds /* No CPUs can come up or down during this. */ 49886ef5c9aSGautham R Shenoy get_online_cpus(); 499eeec4fadSRusty Russell ret = __stop_machine(fn, data, cpus); 50086ef5c9aSGautham R Shenoy put_online_cpus(); 5011da177e4SLinus Torvalds return ret; 5021da177e4SLinus Torvalds } 503eeec4fadSRusty Russell EXPORT_SYMBOL_GPL(stop_machine); 504bbf1bb3eSTejun Heo 505f740e6cdSTejun Heo /** 506f740e6cdSTejun Heo * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU 507f740e6cdSTejun Heo * @fn: the function to run 508f740e6cdSTejun Heo * @data: the data ptr for the @fn() 509f740e6cdSTejun Heo * @cpus: the cpus to run the @fn() on (NULL = any online cpu) 510f740e6cdSTejun Heo * 511f740e6cdSTejun Heo * This is identical to stop_machine() but can be called from a CPU which 512f740e6cdSTejun Heo * is not active. The local CPU is in the process of hotplug (so no other 513f740e6cdSTejun Heo * CPU hotplug can start) and not marked active and doesn't have enough 514f740e6cdSTejun Heo * context to sleep. 515f740e6cdSTejun Heo * 516f740e6cdSTejun Heo * This function provides stop_machine() functionality for such state by 517f740e6cdSTejun Heo * using busy-wait for synchronization and executing @fn directly for local 518f740e6cdSTejun Heo * CPU. 519f740e6cdSTejun Heo * 520f740e6cdSTejun Heo * CONTEXT: 521f740e6cdSTejun Heo * Local CPU is inactive. Temporarily stops all active CPUs. 522f740e6cdSTejun Heo * 523f740e6cdSTejun Heo * RETURNS: 524f740e6cdSTejun Heo * 0 if all executions of @fn returned 0, any non zero return value if any 525f740e6cdSTejun Heo * returned non zero. 526f740e6cdSTejun Heo */ 527f740e6cdSTejun Heo int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, 528f740e6cdSTejun Heo const struct cpumask *cpus) 529f740e6cdSTejun Heo { 530f740e6cdSTejun Heo struct stop_machine_data smdata = { .fn = fn, .data = data, 531f740e6cdSTejun Heo .active_cpus = cpus }; 532f740e6cdSTejun Heo struct cpu_stop_done done; 533f740e6cdSTejun Heo int ret; 534f740e6cdSTejun Heo 535f740e6cdSTejun Heo /* Local CPU must be inactive and CPU hotplug in progress. */ 536f740e6cdSTejun Heo BUG_ON(cpu_active(raw_smp_processor_id())); 537f740e6cdSTejun Heo smdata.num_threads = num_active_cpus() + 1; /* +1 for local */ 538f740e6cdSTejun Heo 539f740e6cdSTejun Heo /* No proper task established and can't sleep - busy wait for lock. */ 540f740e6cdSTejun Heo while (!mutex_trylock(&stop_cpus_mutex)) 541f740e6cdSTejun Heo cpu_relax(); 542f740e6cdSTejun Heo 543f740e6cdSTejun Heo /* Schedule work on other CPUs and execute directly for local CPU */ 544f740e6cdSTejun Heo set_state(&smdata, STOPMACHINE_PREPARE); 545f740e6cdSTejun Heo cpu_stop_init_done(&done, num_active_cpus()); 546f740e6cdSTejun Heo queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata, 547f740e6cdSTejun Heo &done); 548f740e6cdSTejun Heo ret = stop_machine_cpu_stop(&smdata); 549f740e6cdSTejun Heo 550f740e6cdSTejun Heo /* Busy wait for completion. */ 551f740e6cdSTejun Heo while (!completion_done(&done.completion)) 552f740e6cdSTejun Heo cpu_relax(); 553f740e6cdSTejun Heo 554f740e6cdSTejun Heo mutex_unlock(&stop_cpus_mutex); 555f740e6cdSTejun Heo return ret ?: done.ret; 556f740e6cdSTejun Heo } 557f740e6cdSTejun Heo 558bbf1bb3eSTejun Heo #endif /* CONFIG_STOP_MACHINE */ 559