1*1142d810STejun Heo /* 2*1142d810STejun Heo * kernel/stop_machine.c 3*1142d810STejun Heo * 4*1142d810STejun Heo * Copyright (C) 2008, 2005 IBM Corporation. 5*1142d810STejun Heo * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au 6*1142d810STejun Heo * Copyright (C) 2010 SUSE Linux Products GmbH 7*1142d810STejun Heo * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 8*1142d810STejun Heo * 9*1142d810STejun Heo * This file is released under the GPLv2 and any later version. 10e5582ca2SRusty Russell */ 11*1142d810STejun Heo #include <linux/completion.h> 121da177e4SLinus Torvalds #include <linux/cpu.h> 13*1142d810STejun Heo #include <linux/init.h> 14ee527cd3SPrarit Bhargava #include <linux/kthread.h> 15ee527cd3SPrarit Bhargava #include <linux/module.h> 16*1142d810STejun Heo #include <linux/percpu.h> 17ee527cd3SPrarit Bhargava #include <linux/sched.h> 18ee527cd3SPrarit Bhargava #include <linux/stop_machine.h> 19a12bb444SBenjamin Herrenschmidt #include <linux/interrupt.h> 20*1142d810STejun Heo #include <linux/kallsyms.h> 21a12bb444SBenjamin Herrenschmidt 221da177e4SLinus Torvalds #include <asm/atomic.h> 23*1142d810STejun Heo 24*1142d810STejun Heo /* 25*1142d810STejun Heo * Structure to determine completion condition and record errors. May 26*1142d810STejun Heo * be shared by works on different cpus. 27*1142d810STejun Heo */ 28*1142d810STejun Heo struct cpu_stop_done { 29*1142d810STejun Heo atomic_t nr_todo; /* nr left to execute */ 30*1142d810STejun Heo bool executed; /* actually executed? */ 31*1142d810STejun Heo int ret; /* collected return value */ 32*1142d810STejun Heo struct completion completion; /* fired if nr_todo reaches 0 */ 33*1142d810STejun Heo }; 34*1142d810STejun Heo 35*1142d810STejun Heo /* the actual stopper, one per every possible cpu, enabled on online cpus */ 36*1142d810STejun Heo struct cpu_stopper { 37*1142d810STejun Heo spinlock_t lock; 38*1142d810STejun Heo struct list_head works; /* list of pending works */ 39*1142d810STejun Heo struct task_struct *thread; /* stopper thread */ 40*1142d810STejun Heo bool enabled; /* is this stopper enabled? */ 41*1142d810STejun Heo }; 42*1142d810STejun Heo 43*1142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); 44*1142d810STejun Heo 45*1142d810STejun Heo static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) 46*1142d810STejun Heo { 47*1142d810STejun Heo memset(done, 0, sizeof(*done)); 48*1142d810STejun Heo atomic_set(&done->nr_todo, nr_todo); 49*1142d810STejun Heo init_completion(&done->completion); 50*1142d810STejun Heo } 51*1142d810STejun Heo 52*1142d810STejun Heo /* signal completion unless @done is NULL */ 53*1142d810STejun Heo static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) 54*1142d810STejun Heo { 55*1142d810STejun Heo if (done) { 56*1142d810STejun Heo if (executed) 57*1142d810STejun Heo done->executed = true; 58*1142d810STejun Heo if (atomic_dec_and_test(&done->nr_todo)) 59*1142d810STejun Heo complete(&done->completion); 60*1142d810STejun Heo } 61*1142d810STejun Heo } 62*1142d810STejun Heo 63*1142d810STejun Heo /* queue @work to @stopper. if offline, @work is completed immediately */ 64*1142d810STejun Heo static void cpu_stop_queue_work(struct cpu_stopper *stopper, 65*1142d810STejun Heo struct cpu_stop_work *work) 66*1142d810STejun Heo { 67*1142d810STejun Heo unsigned long flags; 68*1142d810STejun Heo 69*1142d810STejun Heo spin_lock_irqsave(&stopper->lock, flags); 70*1142d810STejun Heo 71*1142d810STejun Heo if (stopper->enabled) { 72*1142d810STejun Heo list_add_tail(&work->list, &stopper->works); 73*1142d810STejun Heo wake_up_process(stopper->thread); 74*1142d810STejun Heo } else 75*1142d810STejun Heo cpu_stop_signal_done(work->done, false); 76*1142d810STejun Heo 77*1142d810STejun Heo spin_unlock_irqrestore(&stopper->lock, flags); 78*1142d810STejun Heo } 79*1142d810STejun Heo 80*1142d810STejun Heo /** 81*1142d810STejun Heo * stop_one_cpu - stop a cpu 82*1142d810STejun Heo * @cpu: cpu to stop 83*1142d810STejun Heo * @fn: function to execute 84*1142d810STejun Heo * @arg: argument to @fn 85*1142d810STejun Heo * 86*1142d810STejun Heo * Execute @fn(@arg) on @cpu. @fn is run in a process context with 87*1142d810STejun Heo * the highest priority preempting any task on the cpu and 88*1142d810STejun Heo * monopolizing it. This function returns after the execution is 89*1142d810STejun Heo * complete. 90*1142d810STejun Heo * 91*1142d810STejun Heo * This function doesn't guarantee @cpu stays online till @fn 92*1142d810STejun Heo * completes. If @cpu goes down in the middle, execution may happen 93*1142d810STejun Heo * partially or fully on different cpus. @fn should either be ready 94*1142d810STejun Heo * for that or the caller should ensure that @cpu stays online until 95*1142d810STejun Heo * this function completes. 96*1142d810STejun Heo * 97*1142d810STejun Heo * CONTEXT: 98*1142d810STejun Heo * Might sleep. 99*1142d810STejun Heo * 100*1142d810STejun Heo * RETURNS: 101*1142d810STejun Heo * -ENOENT if @fn(@arg) was not executed because @cpu was offline; 102*1142d810STejun Heo * otherwise, the return value of @fn. 103*1142d810STejun Heo */ 104*1142d810STejun Heo int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) 105*1142d810STejun Heo { 106*1142d810STejun Heo struct cpu_stop_done done; 107*1142d810STejun Heo struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; 108*1142d810STejun Heo 109*1142d810STejun Heo cpu_stop_init_done(&done, 1); 110*1142d810STejun Heo cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work); 111*1142d810STejun Heo wait_for_completion(&done.completion); 112*1142d810STejun Heo return done.executed ? done.ret : -ENOENT; 113*1142d810STejun Heo } 114*1142d810STejun Heo 115*1142d810STejun Heo /** 116*1142d810STejun Heo * stop_one_cpu_nowait - stop a cpu but don't wait for completion 117*1142d810STejun Heo * @cpu: cpu to stop 118*1142d810STejun Heo * @fn: function to execute 119*1142d810STejun Heo * @arg: argument to @fn 120*1142d810STejun Heo * 121*1142d810STejun Heo * Similar to stop_one_cpu() but doesn't wait for completion. The 122*1142d810STejun Heo * caller is responsible for ensuring @work_buf is currently unused 123*1142d810STejun Heo * and will remain untouched until stopper starts executing @fn. 124*1142d810STejun Heo * 125*1142d810STejun Heo * CONTEXT: 126*1142d810STejun Heo * Don't care. 127*1142d810STejun Heo */ 128*1142d810STejun Heo void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, 129*1142d810STejun Heo struct cpu_stop_work *work_buf) 130*1142d810STejun Heo { 131*1142d810STejun Heo *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; 132*1142d810STejun Heo cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); 133*1142d810STejun Heo } 134*1142d810STejun Heo 135*1142d810STejun Heo /* static data for stop_cpus */ 136*1142d810STejun Heo static DEFINE_MUTEX(stop_cpus_mutex); 137*1142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); 138*1142d810STejun Heo 139*1142d810STejun Heo int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 140*1142d810STejun Heo { 141*1142d810STejun Heo struct cpu_stop_work *work; 142*1142d810STejun Heo struct cpu_stop_done done; 143*1142d810STejun Heo unsigned int cpu; 144*1142d810STejun Heo 145*1142d810STejun Heo /* initialize works and done */ 146*1142d810STejun Heo for_each_cpu(cpu, cpumask) { 147*1142d810STejun Heo work = &per_cpu(stop_cpus_work, cpu); 148*1142d810STejun Heo work->fn = fn; 149*1142d810STejun Heo work->arg = arg; 150*1142d810STejun Heo work->done = &done; 151*1142d810STejun Heo } 152*1142d810STejun Heo cpu_stop_init_done(&done, cpumask_weight(cpumask)); 153*1142d810STejun Heo 154*1142d810STejun Heo /* 155*1142d810STejun Heo * Disable preemption while queueing to avoid getting 156*1142d810STejun Heo * preempted by a stopper which might wait for other stoppers 157*1142d810STejun Heo * to enter @fn which can lead to deadlock. 158*1142d810STejun Heo */ 159*1142d810STejun Heo preempt_disable(); 160*1142d810STejun Heo for_each_cpu(cpu, cpumask) 161*1142d810STejun Heo cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), 162*1142d810STejun Heo &per_cpu(stop_cpus_work, cpu)); 163*1142d810STejun Heo preempt_enable(); 164*1142d810STejun Heo 165*1142d810STejun Heo wait_for_completion(&done.completion); 166*1142d810STejun Heo return done.executed ? done.ret : -ENOENT; 167*1142d810STejun Heo } 168*1142d810STejun Heo 169*1142d810STejun Heo /** 170*1142d810STejun Heo * stop_cpus - stop multiple cpus 171*1142d810STejun Heo * @cpumask: cpus to stop 172*1142d810STejun Heo * @fn: function to execute 173*1142d810STejun Heo * @arg: argument to @fn 174*1142d810STejun Heo * 175*1142d810STejun Heo * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, 176*1142d810STejun Heo * @fn is run in a process context with the highest priority 177*1142d810STejun Heo * preempting any task on the cpu and monopolizing it. This function 178*1142d810STejun Heo * returns after all executions are complete. 179*1142d810STejun Heo * 180*1142d810STejun Heo * This function doesn't guarantee the cpus in @cpumask stay online 181*1142d810STejun Heo * till @fn completes. If some cpus go down in the middle, execution 182*1142d810STejun Heo * on the cpu may happen partially or fully on different cpus. @fn 183*1142d810STejun Heo * should either be ready for that or the caller should ensure that 184*1142d810STejun Heo * the cpus stay online until this function completes. 185*1142d810STejun Heo * 186*1142d810STejun Heo * All stop_cpus() calls are serialized making it safe for @fn to wait 187*1142d810STejun Heo * for all cpus to start executing it. 188*1142d810STejun Heo * 189*1142d810STejun Heo * CONTEXT: 190*1142d810STejun Heo * Might sleep. 191*1142d810STejun Heo * 192*1142d810STejun Heo * RETURNS: 193*1142d810STejun Heo * -ENOENT if @fn(@arg) was not executed at all because all cpus in 194*1142d810STejun Heo * @cpumask were offline; otherwise, 0 if all executions of @fn 195*1142d810STejun Heo * returned 0, any non zero return value if any returned non zero. 196*1142d810STejun Heo */ 197*1142d810STejun Heo int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 198*1142d810STejun Heo { 199*1142d810STejun Heo int ret; 200*1142d810STejun Heo 201*1142d810STejun Heo /* static works are used, process one request at a time */ 202*1142d810STejun Heo mutex_lock(&stop_cpus_mutex); 203*1142d810STejun Heo ret = __stop_cpus(cpumask, fn, arg); 204*1142d810STejun Heo mutex_unlock(&stop_cpus_mutex); 205*1142d810STejun Heo return ret; 206*1142d810STejun Heo } 207*1142d810STejun Heo 208*1142d810STejun Heo /** 209*1142d810STejun Heo * try_stop_cpus - try to stop multiple cpus 210*1142d810STejun Heo * @cpumask: cpus to stop 211*1142d810STejun Heo * @fn: function to execute 212*1142d810STejun Heo * @arg: argument to @fn 213*1142d810STejun Heo * 214*1142d810STejun Heo * Identical to stop_cpus() except that it fails with -EAGAIN if 215*1142d810STejun Heo * someone else is already using the facility. 216*1142d810STejun Heo * 217*1142d810STejun Heo * CONTEXT: 218*1142d810STejun Heo * Might sleep. 219*1142d810STejun Heo * 220*1142d810STejun Heo * RETURNS: 221*1142d810STejun Heo * -EAGAIN if someone else is already stopping cpus, -ENOENT if 222*1142d810STejun Heo * @fn(@arg) was not executed at all because all cpus in @cpumask were 223*1142d810STejun Heo * offline; otherwise, 0 if all executions of @fn returned 0, any non 224*1142d810STejun Heo * zero return value if any returned non zero. 225*1142d810STejun Heo */ 226*1142d810STejun Heo int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 227*1142d810STejun Heo { 228*1142d810STejun Heo int ret; 229*1142d810STejun Heo 230*1142d810STejun Heo /* static works are used, process one request at a time */ 231*1142d810STejun Heo if (!mutex_trylock(&stop_cpus_mutex)) 232*1142d810STejun Heo return -EAGAIN; 233*1142d810STejun Heo ret = __stop_cpus(cpumask, fn, arg); 234*1142d810STejun Heo mutex_unlock(&stop_cpus_mutex); 235*1142d810STejun Heo return ret; 236*1142d810STejun Heo } 237*1142d810STejun Heo 238*1142d810STejun Heo static int cpu_stopper_thread(void *data) 239*1142d810STejun Heo { 240*1142d810STejun Heo struct cpu_stopper *stopper = data; 241*1142d810STejun Heo struct cpu_stop_work *work; 242*1142d810STejun Heo int ret; 243*1142d810STejun Heo 244*1142d810STejun Heo repeat: 245*1142d810STejun Heo set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ 246*1142d810STejun Heo 247*1142d810STejun Heo if (kthread_should_stop()) { 248*1142d810STejun Heo __set_current_state(TASK_RUNNING); 249*1142d810STejun Heo return 0; 250*1142d810STejun Heo } 251*1142d810STejun Heo 252*1142d810STejun Heo work = NULL; 253*1142d810STejun Heo spin_lock_irq(&stopper->lock); 254*1142d810STejun Heo if (!list_empty(&stopper->works)) { 255*1142d810STejun Heo work = list_first_entry(&stopper->works, 256*1142d810STejun Heo struct cpu_stop_work, list); 257*1142d810STejun Heo list_del_init(&work->list); 258*1142d810STejun Heo } 259*1142d810STejun Heo spin_unlock_irq(&stopper->lock); 260*1142d810STejun Heo 261*1142d810STejun Heo if (work) { 262*1142d810STejun Heo cpu_stop_fn_t fn = work->fn; 263*1142d810STejun Heo void *arg = work->arg; 264*1142d810STejun Heo struct cpu_stop_done *done = work->done; 265*1142d810STejun Heo char ksym_buf[KSYM_NAME_LEN]; 266*1142d810STejun Heo 267*1142d810STejun Heo __set_current_state(TASK_RUNNING); 268*1142d810STejun Heo 269*1142d810STejun Heo /* cpu stop callbacks are not allowed to sleep */ 270*1142d810STejun Heo preempt_disable(); 271*1142d810STejun Heo 272*1142d810STejun Heo ret = fn(arg); 273*1142d810STejun Heo if (ret) 274*1142d810STejun Heo done->ret = ret; 275*1142d810STejun Heo 276*1142d810STejun Heo /* restore preemption and check it's still balanced */ 277*1142d810STejun Heo preempt_enable(); 278*1142d810STejun Heo WARN_ONCE(preempt_count(), 279*1142d810STejun Heo "cpu_stop: %s(%p) leaked preempt count\n", 280*1142d810STejun Heo kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL, 281*1142d810STejun Heo ksym_buf), arg); 282*1142d810STejun Heo 283*1142d810STejun Heo cpu_stop_signal_done(done, true); 284*1142d810STejun Heo } else 285*1142d810STejun Heo schedule(); 286*1142d810STejun Heo 287*1142d810STejun Heo goto repeat; 288*1142d810STejun Heo } 289*1142d810STejun Heo 290*1142d810STejun Heo /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ 291*1142d810STejun Heo static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, 292*1142d810STejun Heo unsigned long action, void *hcpu) 293*1142d810STejun Heo { 294*1142d810STejun Heo struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; 295*1142d810STejun Heo unsigned int cpu = (unsigned long)hcpu; 296*1142d810STejun Heo struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 297*1142d810STejun Heo struct cpu_stop_work *work; 298*1142d810STejun Heo struct task_struct *p; 299*1142d810STejun Heo 300*1142d810STejun Heo switch (action & ~CPU_TASKS_FROZEN) { 301*1142d810STejun Heo case CPU_UP_PREPARE: 302*1142d810STejun Heo BUG_ON(stopper->thread || stopper->enabled || 303*1142d810STejun Heo !list_empty(&stopper->works)); 304*1142d810STejun Heo p = kthread_create(cpu_stopper_thread, stopper, "stopper/%d", 305*1142d810STejun Heo cpu); 306*1142d810STejun Heo if (IS_ERR(p)) 307*1142d810STejun Heo return NOTIFY_BAD; 308*1142d810STejun Heo sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); 309*1142d810STejun Heo get_task_struct(p); 310*1142d810STejun Heo stopper->thread = p; 311*1142d810STejun Heo break; 312*1142d810STejun Heo 313*1142d810STejun Heo case CPU_ONLINE: 314*1142d810STejun Heo kthread_bind(stopper->thread, cpu); 315*1142d810STejun Heo /* strictly unnecessary, as first user will wake it */ 316*1142d810STejun Heo wake_up_process(stopper->thread); 317*1142d810STejun Heo /* mark enabled */ 318*1142d810STejun Heo spin_lock_irq(&stopper->lock); 319*1142d810STejun Heo stopper->enabled = true; 320*1142d810STejun Heo spin_unlock_irq(&stopper->lock); 321*1142d810STejun Heo break; 322*1142d810STejun Heo 323*1142d810STejun Heo #ifdef CONFIG_HOTPLUG_CPU 324*1142d810STejun Heo case CPU_UP_CANCELED: 325*1142d810STejun Heo case CPU_DEAD: 326*1142d810STejun Heo /* kill the stopper */ 327*1142d810STejun Heo kthread_stop(stopper->thread); 328*1142d810STejun Heo /* drain remaining works */ 329*1142d810STejun Heo spin_lock_irq(&stopper->lock); 330*1142d810STejun Heo list_for_each_entry(work, &stopper->works, list) 331*1142d810STejun Heo cpu_stop_signal_done(work->done, false); 332*1142d810STejun Heo stopper->enabled = false; 333*1142d810STejun Heo spin_unlock_irq(&stopper->lock); 334*1142d810STejun Heo /* release the stopper */ 335*1142d810STejun Heo put_task_struct(stopper->thread); 336*1142d810STejun Heo stopper->thread = NULL; 337*1142d810STejun Heo break; 338*1142d810STejun Heo #endif 339*1142d810STejun Heo } 340*1142d810STejun Heo 341*1142d810STejun Heo return NOTIFY_OK; 342*1142d810STejun Heo } 343*1142d810STejun Heo 344*1142d810STejun Heo /* 345*1142d810STejun Heo * Give it a higher priority so that cpu stopper is available to other 346*1142d810STejun Heo * cpu notifiers. It currently shares the same priority as sched 347*1142d810STejun Heo * migration_notifier. 348*1142d810STejun Heo */ 349*1142d810STejun Heo static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { 350*1142d810STejun Heo .notifier_call = cpu_stop_cpu_callback, 351*1142d810STejun Heo .priority = 10, 352*1142d810STejun Heo }; 353*1142d810STejun Heo 354*1142d810STejun Heo static int __init cpu_stop_init(void) 355*1142d810STejun Heo { 356*1142d810STejun Heo void *bcpu = (void *)(long)smp_processor_id(); 357*1142d810STejun Heo unsigned int cpu; 358*1142d810STejun Heo int err; 359*1142d810STejun Heo 360*1142d810STejun Heo for_each_possible_cpu(cpu) { 361*1142d810STejun Heo struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 362*1142d810STejun Heo 363*1142d810STejun Heo spin_lock_init(&stopper->lock); 364*1142d810STejun Heo INIT_LIST_HEAD(&stopper->works); 365*1142d810STejun Heo } 366*1142d810STejun Heo 367*1142d810STejun Heo /* start one for the boot cpu */ 368*1142d810STejun Heo err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, 369*1142d810STejun Heo bcpu); 370*1142d810STejun Heo BUG_ON(err == NOTIFY_BAD); 371*1142d810STejun Heo cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); 372*1142d810STejun Heo register_cpu_notifier(&cpu_stop_cpu_notifier); 373*1142d810STejun Heo 374*1142d810STejun Heo return 0; 375*1142d810STejun Heo } 376*1142d810STejun Heo early_initcall(cpu_stop_init); 3771da177e4SLinus Torvalds 378ffdb5976SRusty Russell /* This controls the threads on each CPU. */ 3791da177e4SLinus Torvalds enum stopmachine_state { 380ffdb5976SRusty Russell /* Dummy starting state for thread. */ 381ffdb5976SRusty Russell STOPMACHINE_NONE, 382ffdb5976SRusty Russell /* Awaiting everyone to be scheduled. */ 3831da177e4SLinus Torvalds STOPMACHINE_PREPARE, 384ffdb5976SRusty Russell /* Disable interrupts. */ 3851da177e4SLinus Torvalds STOPMACHINE_DISABLE_IRQ, 386ffdb5976SRusty Russell /* Run the function */ 3875c2aed62SJason Baron STOPMACHINE_RUN, 388ffdb5976SRusty Russell /* Exit */ 3891da177e4SLinus Torvalds STOPMACHINE_EXIT, 3901da177e4SLinus Torvalds }; 391ffdb5976SRusty Russell static enum stopmachine_state state; 3921da177e4SLinus Torvalds 3935c2aed62SJason Baron struct stop_machine_data { 3945c2aed62SJason Baron int (*fn)(void *); 3955c2aed62SJason Baron void *data; 396ffdb5976SRusty Russell int fnret; 397ffdb5976SRusty Russell }; 3985c2aed62SJason Baron 399ffdb5976SRusty Russell /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 400ffdb5976SRusty Russell static unsigned int num_threads; 401ffdb5976SRusty Russell static atomic_t thread_ack; 402ffdb5976SRusty Russell static DEFINE_MUTEX(lock); 4039ea09af3SHeiko Carstens /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ 4049ea09af3SHeiko Carstens static DEFINE_MUTEX(setup_lock); 4059ea09af3SHeiko Carstens /* Users of stop_machine. */ 4069ea09af3SHeiko Carstens static int refcount; 407c9583e55SHeiko Carstens static struct workqueue_struct *stop_machine_wq; 408c9583e55SHeiko Carstens static struct stop_machine_data active, idle; 409612a726fSRusty Russell static const struct cpumask *active_cpus; 41043cf38ebSTejun Heo static void __percpu *stop_machine_work; 411c9583e55SHeiko Carstens 412ffdb5976SRusty Russell static void set_state(enum stopmachine_state newstate) 4131da177e4SLinus Torvalds { 414ffdb5976SRusty Russell /* Reset ack counter. */ 415ffdb5976SRusty Russell atomic_set(&thread_ack, num_threads); 416ffdb5976SRusty Russell smp_wmb(); 417ffdb5976SRusty Russell state = newstate; 418ffdb5976SRusty Russell } 4191da177e4SLinus Torvalds 420ffdb5976SRusty Russell /* Last one to ack a state moves to the next state. */ 421ffdb5976SRusty Russell static void ack_state(void) 422ffdb5976SRusty Russell { 423c9583e55SHeiko Carstens if (atomic_dec_and_test(&thread_ack)) 424ffdb5976SRusty Russell set_state(state + 1); 425ffdb5976SRusty Russell } 426d8cb7c1dSAndrew Morton 427c9583e55SHeiko Carstens /* This is the actual function which stops the CPU. It runs 428c9583e55SHeiko Carstens * in the context of a dedicated stopmachine workqueue. */ 429c9583e55SHeiko Carstens static void stop_cpu(struct work_struct *unused) 430ffdb5976SRusty Russell { 431ffdb5976SRusty Russell enum stopmachine_state curstate = STOPMACHINE_NONE; 432c9583e55SHeiko Carstens struct stop_machine_data *smdata = &idle; 433c9583e55SHeiko Carstens int cpu = smp_processor_id(); 4348163bcacSHeiko Carstens int err; 4351da177e4SLinus Torvalds 436c9583e55SHeiko Carstens if (!active_cpus) { 43741c7bb95SRusty Russell if (cpu == cpumask_first(cpu_online_mask)) 438c9583e55SHeiko Carstens smdata = &active; 439c9583e55SHeiko Carstens } else { 44041c7bb95SRusty Russell if (cpumask_test_cpu(cpu, active_cpus)) 441c9583e55SHeiko Carstens smdata = &active; 442c9583e55SHeiko Carstens } 4431da177e4SLinus Torvalds /* Simple state machine */ 444ffdb5976SRusty Russell do { 445ffdb5976SRusty Russell /* Chill out and ensure we re-read stopmachine_state. */ 446ffdb5976SRusty Russell cpu_relax(); 447ffdb5976SRusty Russell if (state != curstate) { 448ffdb5976SRusty Russell curstate = state; 449ffdb5976SRusty Russell switch (curstate) { 450ffdb5976SRusty Russell case STOPMACHINE_DISABLE_IRQ: 4511da177e4SLinus Torvalds local_irq_disable(); 452a12bb444SBenjamin Herrenschmidt hard_irq_disable(); 453ffdb5976SRusty Russell break; 454ffdb5976SRusty Russell case STOPMACHINE_RUN: 4558163bcacSHeiko Carstens /* On multiple CPUs only a single error code 4568163bcacSHeiko Carstens * is needed to tell that something failed. */ 4578163bcacSHeiko Carstens err = smdata->fn(smdata->data); 4588163bcacSHeiko Carstens if (err) 4598163bcacSHeiko Carstens smdata->fnret = err; 460ffdb5976SRusty Russell break; 461ffdb5976SRusty Russell default: 462ffdb5976SRusty Russell break; 4631da177e4SLinus Torvalds } 464ffdb5976SRusty Russell ack_state(); 4651da177e4SLinus Torvalds } 466ffdb5976SRusty Russell } while (curstate != STOPMACHINE_EXIT); 4671da177e4SLinus Torvalds 4681da177e4SLinus Torvalds local_irq_enable(); 469ffdb5976SRusty Russell } 4701da177e4SLinus Torvalds 471ffdb5976SRusty Russell /* Callback for CPUs which aren't supposed to do anything. */ 472ffdb5976SRusty Russell static int chill(void *unused) 473ffdb5976SRusty Russell { 4741da177e4SLinus Torvalds return 0; 4751da177e4SLinus Torvalds } 4761da177e4SLinus Torvalds 4779ea09af3SHeiko Carstens int stop_machine_create(void) 4789ea09af3SHeiko Carstens { 4799ea09af3SHeiko Carstens mutex_lock(&setup_lock); 4809ea09af3SHeiko Carstens if (refcount) 4819ea09af3SHeiko Carstens goto done; 4829ea09af3SHeiko Carstens stop_machine_wq = create_rt_workqueue("kstop"); 4839ea09af3SHeiko Carstens if (!stop_machine_wq) 4849ea09af3SHeiko Carstens goto err_out; 4859ea09af3SHeiko Carstens stop_machine_work = alloc_percpu(struct work_struct); 4869ea09af3SHeiko Carstens if (!stop_machine_work) 4879ea09af3SHeiko Carstens goto err_out; 4889ea09af3SHeiko Carstens done: 4899ea09af3SHeiko Carstens refcount++; 4909ea09af3SHeiko Carstens mutex_unlock(&setup_lock); 4919ea09af3SHeiko Carstens return 0; 4929ea09af3SHeiko Carstens 4939ea09af3SHeiko Carstens err_out: 4949ea09af3SHeiko Carstens if (stop_machine_wq) 4959ea09af3SHeiko Carstens destroy_workqueue(stop_machine_wq); 4969ea09af3SHeiko Carstens mutex_unlock(&setup_lock); 4979ea09af3SHeiko Carstens return -ENOMEM; 4989ea09af3SHeiko Carstens } 4999ea09af3SHeiko Carstens EXPORT_SYMBOL_GPL(stop_machine_create); 5009ea09af3SHeiko Carstens 5019ea09af3SHeiko Carstens void stop_machine_destroy(void) 5029ea09af3SHeiko Carstens { 5039ea09af3SHeiko Carstens mutex_lock(&setup_lock); 5049ea09af3SHeiko Carstens refcount--; 5059ea09af3SHeiko Carstens if (refcount) 5069ea09af3SHeiko Carstens goto done; 5079ea09af3SHeiko Carstens destroy_workqueue(stop_machine_wq); 5089ea09af3SHeiko Carstens free_percpu(stop_machine_work); 5099ea09af3SHeiko Carstens done: 5109ea09af3SHeiko Carstens mutex_unlock(&setup_lock); 5119ea09af3SHeiko Carstens } 5129ea09af3SHeiko Carstens EXPORT_SYMBOL_GPL(stop_machine_destroy); 5139ea09af3SHeiko Carstens 51441c7bb95SRusty Russell int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 5151da177e4SLinus Torvalds { 516c9583e55SHeiko Carstens struct work_struct *sm_work; 517e14c8bf8SRusty Russell int i, ret; 5181da177e4SLinus Torvalds 519c9583e55SHeiko Carstens /* Set up initial state. */ 520c9583e55SHeiko Carstens mutex_lock(&lock); 521c9583e55SHeiko Carstens num_threads = num_online_cpus(); 522c9583e55SHeiko Carstens active_cpus = cpus; 523ffdb5976SRusty Russell active.fn = fn; 524ffdb5976SRusty Russell active.data = data; 525ffdb5976SRusty Russell active.fnret = 0; 526ffdb5976SRusty Russell idle.fn = chill; 527ffdb5976SRusty Russell idle.data = NULL; 5281da177e4SLinus Torvalds 529ffdb5976SRusty Russell set_state(STOPMACHINE_PREPARE); 5301da177e4SLinus Torvalds 531c9583e55SHeiko Carstens /* Schedule the stop_cpu work on all cpus: hold this CPU so one 532ffdb5976SRusty Russell * doesn't hit this CPU until we're ready. */ 533eeec4fadSRusty Russell get_cpu(); 534c9583e55SHeiko Carstens for_each_online_cpu(i) { 535b36128c8SRusty Russell sm_work = per_cpu_ptr(stop_machine_work, i); 536c9583e55SHeiko Carstens INIT_WORK(sm_work, stop_cpu); 537c9583e55SHeiko Carstens queue_work_on(i, stop_machine_wq, sm_work); 538c9583e55SHeiko Carstens } 539ffdb5976SRusty Russell /* This will release the thread on our CPU. */ 540ffdb5976SRusty Russell put_cpu(); 541c9583e55SHeiko Carstens flush_workqueue(stop_machine_wq); 542e14c8bf8SRusty Russell ret = active.fnret; 543ffdb5976SRusty Russell mutex_unlock(&lock); 544e14c8bf8SRusty Russell return ret; 5451da177e4SLinus Torvalds } 5461da177e4SLinus Torvalds 54741c7bb95SRusty Russell int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 5481da177e4SLinus Torvalds { 5491da177e4SLinus Torvalds int ret; 5501da177e4SLinus Torvalds 5519ea09af3SHeiko Carstens ret = stop_machine_create(); 5529ea09af3SHeiko Carstens if (ret) 5539ea09af3SHeiko Carstens return ret; 5541da177e4SLinus Torvalds /* No CPUs can come up or down during this. */ 55586ef5c9aSGautham R Shenoy get_online_cpus(); 556eeec4fadSRusty Russell ret = __stop_machine(fn, data, cpus); 55786ef5c9aSGautham R Shenoy put_online_cpus(); 5589ea09af3SHeiko Carstens stop_machine_destroy(); 5591da177e4SLinus Torvalds return ret; 5601da177e4SLinus Torvalds } 561eeec4fadSRusty Russell EXPORT_SYMBOL_GPL(stop_machine); 562