xref: /linux/kernel/stop_machine.c (revision 9c6f7e43b4e02c161b53e97ba913855246876c61)
11142d810STejun Heo /*
21142d810STejun Heo  * kernel/stop_machine.c
31142d810STejun Heo  *
41142d810STejun Heo  * Copyright (C) 2008, 2005	IBM Corporation.
51142d810STejun Heo  * Copyright (C) 2008, 2005	Rusty Russell rusty@rustcorp.com.au
61142d810STejun Heo  * Copyright (C) 2010		SUSE Linux Products GmbH
71142d810STejun Heo  * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
81142d810STejun Heo  *
91142d810STejun Heo  * This file is released under the GPLv2 and any later version.
10e5582ca2SRusty Russell  */
111142d810STejun Heo #include <linux/completion.h>
121da177e4SLinus Torvalds #include <linux/cpu.h>
131142d810STejun Heo #include <linux/init.h>
14ee527cd3SPrarit Bhargava #include <linux/kthread.h>
15ee527cd3SPrarit Bhargava #include <linux/module.h>
161142d810STejun Heo #include <linux/percpu.h>
17ee527cd3SPrarit Bhargava #include <linux/sched.h>
18ee527cd3SPrarit Bhargava #include <linux/stop_machine.h>
19a12bb444SBenjamin Herrenschmidt #include <linux/interrupt.h>
201142d810STejun Heo #include <linux/kallsyms.h>
21a12bb444SBenjamin Herrenschmidt 
221da177e4SLinus Torvalds #include <asm/atomic.h>
231142d810STejun Heo 
241142d810STejun Heo /*
251142d810STejun Heo  * Structure to determine completion condition and record errors.  May
261142d810STejun Heo  * be shared by works on different cpus.
271142d810STejun Heo  */
281142d810STejun Heo struct cpu_stop_done {
291142d810STejun Heo 	atomic_t		nr_todo;	/* nr left to execute */
301142d810STejun Heo 	bool			executed;	/* actually executed? */
311142d810STejun Heo 	int			ret;		/* collected return value */
321142d810STejun Heo 	struct completion	completion;	/* fired if nr_todo reaches 0 */
331142d810STejun Heo };
341142d810STejun Heo 
351142d810STejun Heo /* the actual stopper, one per every possible cpu, enabled on online cpus */
361142d810STejun Heo struct cpu_stopper {
371142d810STejun Heo 	spinlock_t		lock;
381142d810STejun Heo 	struct list_head	works;		/* list of pending works */
391142d810STejun Heo 	struct task_struct	*thread;	/* stopper thread */
401142d810STejun Heo 	bool			enabled;	/* is this stopper enabled? */
411142d810STejun Heo };
421142d810STejun Heo 
431142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
441142d810STejun Heo 
451142d810STejun Heo static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
461142d810STejun Heo {
471142d810STejun Heo 	memset(done, 0, sizeof(*done));
481142d810STejun Heo 	atomic_set(&done->nr_todo, nr_todo);
491142d810STejun Heo 	init_completion(&done->completion);
501142d810STejun Heo }
511142d810STejun Heo 
521142d810STejun Heo /* signal completion unless @done is NULL */
531142d810STejun Heo static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
541142d810STejun Heo {
551142d810STejun Heo 	if (done) {
561142d810STejun Heo 		if (executed)
571142d810STejun Heo 			done->executed = true;
581142d810STejun Heo 		if (atomic_dec_and_test(&done->nr_todo))
591142d810STejun Heo 			complete(&done->completion);
601142d810STejun Heo 	}
611142d810STejun Heo }
621142d810STejun Heo 
631142d810STejun Heo /* queue @work to @stopper.  if offline, @work is completed immediately */
641142d810STejun Heo static void cpu_stop_queue_work(struct cpu_stopper *stopper,
651142d810STejun Heo 				struct cpu_stop_work *work)
661142d810STejun Heo {
671142d810STejun Heo 	unsigned long flags;
681142d810STejun Heo 
691142d810STejun Heo 	spin_lock_irqsave(&stopper->lock, flags);
701142d810STejun Heo 
711142d810STejun Heo 	if (stopper->enabled) {
721142d810STejun Heo 		list_add_tail(&work->list, &stopper->works);
731142d810STejun Heo 		wake_up_process(stopper->thread);
741142d810STejun Heo 	} else
751142d810STejun Heo 		cpu_stop_signal_done(work->done, false);
761142d810STejun Heo 
771142d810STejun Heo 	spin_unlock_irqrestore(&stopper->lock, flags);
781142d810STejun Heo }
791142d810STejun Heo 
801142d810STejun Heo /**
811142d810STejun Heo  * stop_one_cpu - stop a cpu
821142d810STejun Heo  * @cpu: cpu to stop
831142d810STejun Heo  * @fn: function to execute
841142d810STejun Heo  * @arg: argument to @fn
851142d810STejun Heo  *
861142d810STejun Heo  * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
871142d810STejun Heo  * the highest priority preempting any task on the cpu and
881142d810STejun Heo  * monopolizing it.  This function returns after the execution is
891142d810STejun Heo  * complete.
901142d810STejun Heo  *
911142d810STejun Heo  * This function doesn't guarantee @cpu stays online till @fn
921142d810STejun Heo  * completes.  If @cpu goes down in the middle, execution may happen
931142d810STejun Heo  * partially or fully on different cpus.  @fn should either be ready
941142d810STejun Heo  * for that or the caller should ensure that @cpu stays online until
951142d810STejun Heo  * this function completes.
961142d810STejun Heo  *
971142d810STejun Heo  * CONTEXT:
981142d810STejun Heo  * Might sleep.
991142d810STejun Heo  *
1001142d810STejun Heo  * RETURNS:
1011142d810STejun Heo  * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
1021142d810STejun Heo  * otherwise, the return value of @fn.
1031142d810STejun Heo  */
1041142d810STejun Heo int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
1051142d810STejun Heo {
1061142d810STejun Heo 	struct cpu_stop_done done;
1071142d810STejun Heo 	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
1081142d810STejun Heo 
1091142d810STejun Heo 	cpu_stop_init_done(&done, 1);
1101142d810STejun Heo 	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
1111142d810STejun Heo 	wait_for_completion(&done.completion);
1121142d810STejun Heo 	return done.executed ? done.ret : -ENOENT;
1131142d810STejun Heo }
1141142d810STejun Heo 
1151142d810STejun Heo /**
1161142d810STejun Heo  * stop_one_cpu_nowait - stop a cpu but don't wait for completion
1171142d810STejun Heo  * @cpu: cpu to stop
1181142d810STejun Heo  * @fn: function to execute
1191142d810STejun Heo  * @arg: argument to @fn
1201142d810STejun Heo  *
1211142d810STejun Heo  * Similar to stop_one_cpu() but doesn't wait for completion.  The
1221142d810STejun Heo  * caller is responsible for ensuring @work_buf is currently unused
1231142d810STejun Heo  * and will remain untouched until stopper starts executing @fn.
1241142d810STejun Heo  *
1251142d810STejun Heo  * CONTEXT:
1261142d810STejun Heo  * Don't care.
1271142d810STejun Heo  */
1281142d810STejun Heo void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
1291142d810STejun Heo 			struct cpu_stop_work *work_buf)
1301142d810STejun Heo {
1311142d810STejun Heo 	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
1321142d810STejun Heo 	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
1331142d810STejun Heo }
1341142d810STejun Heo 
1351142d810STejun Heo /* static data for stop_cpus */
1361142d810STejun Heo static DEFINE_MUTEX(stop_cpus_mutex);
1371142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
1381142d810STejun Heo 
1391142d810STejun Heo int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
1401142d810STejun Heo {
1411142d810STejun Heo 	struct cpu_stop_work *work;
1421142d810STejun Heo 	struct cpu_stop_done done;
1431142d810STejun Heo 	unsigned int cpu;
1441142d810STejun Heo 
1451142d810STejun Heo 	/* initialize works and done */
1461142d810STejun Heo 	for_each_cpu(cpu, cpumask) {
1471142d810STejun Heo 		work = &per_cpu(stop_cpus_work, cpu);
1481142d810STejun Heo 		work->fn = fn;
1491142d810STejun Heo 		work->arg = arg;
1501142d810STejun Heo 		work->done = &done;
1511142d810STejun Heo 	}
1521142d810STejun Heo 	cpu_stop_init_done(&done, cpumask_weight(cpumask));
1531142d810STejun Heo 
1541142d810STejun Heo 	/*
1551142d810STejun Heo 	 * Disable preemption while queueing to avoid getting
1561142d810STejun Heo 	 * preempted by a stopper which might wait for other stoppers
1571142d810STejun Heo 	 * to enter @fn which can lead to deadlock.
1581142d810STejun Heo 	 */
1591142d810STejun Heo 	preempt_disable();
1601142d810STejun Heo 	for_each_cpu(cpu, cpumask)
1611142d810STejun Heo 		cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
1621142d810STejun Heo 				    &per_cpu(stop_cpus_work, cpu));
1631142d810STejun Heo 	preempt_enable();
1641142d810STejun Heo 
1651142d810STejun Heo 	wait_for_completion(&done.completion);
1661142d810STejun Heo 	return done.executed ? done.ret : -ENOENT;
1671142d810STejun Heo }
1681142d810STejun Heo 
1691142d810STejun Heo /**
1701142d810STejun Heo  * stop_cpus - stop multiple cpus
1711142d810STejun Heo  * @cpumask: cpus to stop
1721142d810STejun Heo  * @fn: function to execute
1731142d810STejun Heo  * @arg: argument to @fn
1741142d810STejun Heo  *
1751142d810STejun Heo  * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
1761142d810STejun Heo  * @fn is run in a process context with the highest priority
1771142d810STejun Heo  * preempting any task on the cpu and monopolizing it.  This function
1781142d810STejun Heo  * returns after all executions are complete.
1791142d810STejun Heo  *
1801142d810STejun Heo  * This function doesn't guarantee the cpus in @cpumask stay online
1811142d810STejun Heo  * till @fn completes.  If some cpus go down in the middle, execution
1821142d810STejun Heo  * on the cpu may happen partially or fully on different cpus.  @fn
1831142d810STejun Heo  * should either be ready for that or the caller should ensure that
1841142d810STejun Heo  * the cpus stay online until this function completes.
1851142d810STejun Heo  *
1861142d810STejun Heo  * All stop_cpus() calls are serialized making it safe for @fn to wait
1871142d810STejun Heo  * for all cpus to start executing it.
1881142d810STejun Heo  *
1891142d810STejun Heo  * CONTEXT:
1901142d810STejun Heo  * Might sleep.
1911142d810STejun Heo  *
1921142d810STejun Heo  * RETURNS:
1931142d810STejun Heo  * -ENOENT if @fn(@arg) was not executed at all because all cpus in
1941142d810STejun Heo  * @cpumask were offline; otherwise, 0 if all executions of @fn
1951142d810STejun Heo  * returned 0, any non zero return value if any returned non zero.
1961142d810STejun Heo  */
1971142d810STejun Heo int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
1981142d810STejun Heo {
1991142d810STejun Heo 	int ret;
2001142d810STejun Heo 
2011142d810STejun Heo 	/* static works are used, process one request at a time */
2021142d810STejun Heo 	mutex_lock(&stop_cpus_mutex);
2031142d810STejun Heo 	ret = __stop_cpus(cpumask, fn, arg);
2041142d810STejun Heo 	mutex_unlock(&stop_cpus_mutex);
2051142d810STejun Heo 	return ret;
2061142d810STejun Heo }
2071142d810STejun Heo 
2081142d810STejun Heo /**
2091142d810STejun Heo  * try_stop_cpus - try to stop multiple cpus
2101142d810STejun Heo  * @cpumask: cpus to stop
2111142d810STejun Heo  * @fn: function to execute
2121142d810STejun Heo  * @arg: argument to @fn
2131142d810STejun Heo  *
2141142d810STejun Heo  * Identical to stop_cpus() except that it fails with -EAGAIN if
2151142d810STejun Heo  * someone else is already using the facility.
2161142d810STejun Heo  *
2171142d810STejun Heo  * CONTEXT:
2181142d810STejun Heo  * Might sleep.
2191142d810STejun Heo  *
2201142d810STejun Heo  * RETURNS:
2211142d810STejun Heo  * -EAGAIN if someone else is already stopping cpus, -ENOENT if
2221142d810STejun Heo  * @fn(@arg) was not executed at all because all cpus in @cpumask were
2231142d810STejun Heo  * offline; otherwise, 0 if all executions of @fn returned 0, any non
2241142d810STejun Heo  * zero return value if any returned non zero.
2251142d810STejun Heo  */
2261142d810STejun Heo int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
2271142d810STejun Heo {
2281142d810STejun Heo 	int ret;
2291142d810STejun Heo 
2301142d810STejun Heo 	/* static works are used, process one request at a time */
2311142d810STejun Heo 	if (!mutex_trylock(&stop_cpus_mutex))
2321142d810STejun Heo 		return -EAGAIN;
2331142d810STejun Heo 	ret = __stop_cpus(cpumask, fn, arg);
2341142d810STejun Heo 	mutex_unlock(&stop_cpus_mutex);
2351142d810STejun Heo 	return ret;
2361142d810STejun Heo }
2371142d810STejun Heo 
2381142d810STejun Heo static int cpu_stopper_thread(void *data)
2391142d810STejun Heo {
2401142d810STejun Heo 	struct cpu_stopper *stopper = data;
2411142d810STejun Heo 	struct cpu_stop_work *work;
2421142d810STejun Heo 	int ret;
2431142d810STejun Heo 
2441142d810STejun Heo repeat:
2451142d810STejun Heo 	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
2461142d810STejun Heo 
2471142d810STejun Heo 	if (kthread_should_stop()) {
2481142d810STejun Heo 		__set_current_state(TASK_RUNNING);
2491142d810STejun Heo 		return 0;
2501142d810STejun Heo 	}
2511142d810STejun Heo 
2521142d810STejun Heo 	work = NULL;
2531142d810STejun Heo 	spin_lock_irq(&stopper->lock);
2541142d810STejun Heo 	if (!list_empty(&stopper->works)) {
2551142d810STejun Heo 		work = list_first_entry(&stopper->works,
2561142d810STejun Heo 					struct cpu_stop_work, list);
2571142d810STejun Heo 		list_del_init(&work->list);
2581142d810STejun Heo 	}
2591142d810STejun Heo 	spin_unlock_irq(&stopper->lock);
2601142d810STejun Heo 
2611142d810STejun Heo 	if (work) {
2621142d810STejun Heo 		cpu_stop_fn_t fn = work->fn;
2631142d810STejun Heo 		void *arg = work->arg;
2641142d810STejun Heo 		struct cpu_stop_done *done = work->done;
2651142d810STejun Heo 		char ksym_buf[KSYM_NAME_LEN];
2661142d810STejun Heo 
2671142d810STejun Heo 		__set_current_state(TASK_RUNNING);
2681142d810STejun Heo 
2691142d810STejun Heo 		/* cpu stop callbacks are not allowed to sleep */
2701142d810STejun Heo 		preempt_disable();
2711142d810STejun Heo 
2721142d810STejun Heo 		ret = fn(arg);
2731142d810STejun Heo 		if (ret)
2741142d810STejun Heo 			done->ret = ret;
2751142d810STejun Heo 
2761142d810STejun Heo 		/* restore preemption and check it's still balanced */
2771142d810STejun Heo 		preempt_enable();
2781142d810STejun Heo 		WARN_ONCE(preempt_count(),
2791142d810STejun Heo 			  "cpu_stop: %s(%p) leaked preempt count\n",
2801142d810STejun Heo 			  kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
2811142d810STejun Heo 					  ksym_buf), arg);
2821142d810STejun Heo 
2831142d810STejun Heo 		cpu_stop_signal_done(done, true);
2841142d810STejun Heo 	} else
2851142d810STejun Heo 		schedule();
2861142d810STejun Heo 
2871142d810STejun Heo 	goto repeat;
2881142d810STejun Heo }
2891142d810STejun Heo 
2901142d810STejun Heo /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
2911142d810STejun Heo static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
2921142d810STejun Heo 					   unsigned long action, void *hcpu)
2931142d810STejun Heo {
2941142d810STejun Heo 	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
2951142d810STejun Heo 	unsigned int cpu = (unsigned long)hcpu;
2961142d810STejun Heo 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
2971142d810STejun Heo 	struct task_struct *p;
2981142d810STejun Heo 
2991142d810STejun Heo 	switch (action & ~CPU_TASKS_FROZEN) {
3001142d810STejun Heo 	case CPU_UP_PREPARE:
3011142d810STejun Heo 		BUG_ON(stopper->thread || stopper->enabled ||
3021142d810STejun Heo 		       !list_empty(&stopper->works));
303969c7921STejun Heo 		p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
3041142d810STejun Heo 				   cpu);
3051142d810STejun Heo 		if (IS_ERR(p))
3061142d810STejun Heo 			return NOTIFY_BAD;
3071142d810STejun Heo 		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
3081142d810STejun Heo 		get_task_struct(p);
3091142d810STejun Heo 		stopper->thread = p;
3101142d810STejun Heo 		break;
3111142d810STejun Heo 
3121142d810STejun Heo 	case CPU_ONLINE:
3131142d810STejun Heo 		kthread_bind(stopper->thread, cpu);
3141142d810STejun Heo 		/* strictly unnecessary, as first user will wake it */
3151142d810STejun Heo 		wake_up_process(stopper->thread);
3161142d810STejun Heo 		/* mark enabled */
3171142d810STejun Heo 		spin_lock_irq(&stopper->lock);
3181142d810STejun Heo 		stopper->enabled = true;
3191142d810STejun Heo 		spin_unlock_irq(&stopper->lock);
3201142d810STejun Heo 		break;
3211142d810STejun Heo 
3221142d810STejun Heo #ifdef CONFIG_HOTPLUG_CPU
3231142d810STejun Heo 	case CPU_UP_CANCELED:
3241142d810STejun Heo 	case CPU_DEAD:
325*9c6f7e43SIngo Molnar 	{
326*9c6f7e43SIngo Molnar 		struct cpu_stop_work *work;
327*9c6f7e43SIngo Molnar 
3281142d810STejun Heo 		/* kill the stopper */
3291142d810STejun Heo 		kthread_stop(stopper->thread);
3301142d810STejun Heo 		/* drain remaining works */
3311142d810STejun Heo 		spin_lock_irq(&stopper->lock);
3321142d810STejun Heo 		list_for_each_entry(work, &stopper->works, list)
3331142d810STejun Heo 			cpu_stop_signal_done(work->done, false);
3341142d810STejun Heo 		stopper->enabled = false;
3351142d810STejun Heo 		spin_unlock_irq(&stopper->lock);
3361142d810STejun Heo 		/* release the stopper */
3371142d810STejun Heo 		put_task_struct(stopper->thread);
3381142d810STejun Heo 		stopper->thread = NULL;
3391142d810STejun Heo 		break;
340*9c6f7e43SIngo Molnar 	}
3411142d810STejun Heo #endif
3421142d810STejun Heo 	}
3431142d810STejun Heo 
3441142d810STejun Heo 	return NOTIFY_OK;
3451142d810STejun Heo }
3461142d810STejun Heo 
3471142d810STejun Heo /*
3481142d810STejun Heo  * Give it a higher priority so that cpu stopper is available to other
3491142d810STejun Heo  * cpu notifiers.  It currently shares the same priority as sched
3501142d810STejun Heo  * migration_notifier.
3511142d810STejun Heo  */
3521142d810STejun Heo static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
3531142d810STejun Heo 	.notifier_call	= cpu_stop_cpu_callback,
3541142d810STejun Heo 	.priority	= 10,
3551142d810STejun Heo };
3561142d810STejun Heo 
3571142d810STejun Heo static int __init cpu_stop_init(void)
3581142d810STejun Heo {
3591142d810STejun Heo 	void *bcpu = (void *)(long)smp_processor_id();
3601142d810STejun Heo 	unsigned int cpu;
3611142d810STejun Heo 	int err;
3621142d810STejun Heo 
3631142d810STejun Heo 	for_each_possible_cpu(cpu) {
3641142d810STejun Heo 		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
3651142d810STejun Heo 
3661142d810STejun Heo 		spin_lock_init(&stopper->lock);
3671142d810STejun Heo 		INIT_LIST_HEAD(&stopper->works);
3681142d810STejun Heo 	}
3691142d810STejun Heo 
3701142d810STejun Heo 	/* start one for the boot cpu */
3711142d810STejun Heo 	err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
3721142d810STejun Heo 				    bcpu);
3731142d810STejun Heo 	BUG_ON(err == NOTIFY_BAD);
3741142d810STejun Heo 	cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
3751142d810STejun Heo 	register_cpu_notifier(&cpu_stop_cpu_notifier);
3761142d810STejun Heo 
3771142d810STejun Heo 	return 0;
3781142d810STejun Heo }
3791142d810STejun Heo early_initcall(cpu_stop_init);
3801da177e4SLinus Torvalds 
381bbf1bb3eSTejun Heo #ifdef CONFIG_STOP_MACHINE
382bbf1bb3eSTejun Heo 
383ffdb5976SRusty Russell /* This controls the threads on each CPU. */
3841da177e4SLinus Torvalds enum stopmachine_state {
385ffdb5976SRusty Russell 	/* Dummy starting state for thread. */
386ffdb5976SRusty Russell 	STOPMACHINE_NONE,
387ffdb5976SRusty Russell 	/* Awaiting everyone to be scheduled. */
3881da177e4SLinus Torvalds 	STOPMACHINE_PREPARE,
389ffdb5976SRusty Russell 	/* Disable interrupts. */
3901da177e4SLinus Torvalds 	STOPMACHINE_DISABLE_IRQ,
391ffdb5976SRusty Russell 	/* Run the function */
3925c2aed62SJason Baron 	STOPMACHINE_RUN,
393ffdb5976SRusty Russell 	/* Exit */
3941da177e4SLinus Torvalds 	STOPMACHINE_EXIT,
3951da177e4SLinus Torvalds };
3961da177e4SLinus Torvalds 
3975c2aed62SJason Baron struct stop_machine_data {
3985c2aed62SJason Baron 	int			(*fn)(void *);
3995c2aed62SJason Baron 	void			*data;
4003fc1f1e2STejun Heo 	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
4013fc1f1e2STejun Heo 	unsigned int		num_threads;
4023fc1f1e2STejun Heo 	const struct cpumask	*active_cpus;
4033fc1f1e2STejun Heo 
4043fc1f1e2STejun Heo 	enum stopmachine_state	state;
4053fc1f1e2STejun Heo 	atomic_t		thread_ack;
406ffdb5976SRusty Russell };
4075c2aed62SJason Baron 
4083fc1f1e2STejun Heo static void set_state(struct stop_machine_data *smdata,
4093fc1f1e2STejun Heo 		      enum stopmachine_state newstate)
4101da177e4SLinus Torvalds {
411ffdb5976SRusty Russell 	/* Reset ack counter. */
4123fc1f1e2STejun Heo 	atomic_set(&smdata->thread_ack, smdata->num_threads);
413ffdb5976SRusty Russell 	smp_wmb();
4143fc1f1e2STejun Heo 	smdata->state = newstate;
415ffdb5976SRusty Russell }
4161da177e4SLinus Torvalds 
417ffdb5976SRusty Russell /* Last one to ack a state moves to the next state. */
4183fc1f1e2STejun Heo static void ack_state(struct stop_machine_data *smdata)
419ffdb5976SRusty Russell {
4203fc1f1e2STejun Heo 	if (atomic_dec_and_test(&smdata->thread_ack))
4213fc1f1e2STejun Heo 		set_state(smdata, smdata->state + 1);
422ffdb5976SRusty Russell }
423d8cb7c1dSAndrew Morton 
4243fc1f1e2STejun Heo /* This is the cpu_stop function which stops the CPU. */
4253fc1f1e2STejun Heo static int stop_machine_cpu_stop(void *data)
426ffdb5976SRusty Russell {
4273fc1f1e2STejun Heo 	struct stop_machine_data *smdata = data;
428ffdb5976SRusty Russell 	enum stopmachine_state curstate = STOPMACHINE_NONE;
4293fc1f1e2STejun Heo 	int cpu = smp_processor_id(), err = 0;
4303fc1f1e2STejun Heo 	bool is_active;
4311da177e4SLinus Torvalds 
4323fc1f1e2STejun Heo 	if (!smdata->active_cpus)
4333fc1f1e2STejun Heo 		is_active = cpu == cpumask_first(cpu_online_mask);
4343fc1f1e2STejun Heo 	else
4353fc1f1e2STejun Heo 		is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
4363fc1f1e2STejun Heo 
4371da177e4SLinus Torvalds 	/* Simple state machine */
438ffdb5976SRusty Russell 	do {
439ffdb5976SRusty Russell 		/* Chill out and ensure we re-read stopmachine_state. */
440ffdb5976SRusty Russell 		cpu_relax();
4413fc1f1e2STejun Heo 		if (smdata->state != curstate) {
4423fc1f1e2STejun Heo 			curstate = smdata->state;
443ffdb5976SRusty Russell 			switch (curstate) {
444ffdb5976SRusty Russell 			case STOPMACHINE_DISABLE_IRQ:
4451da177e4SLinus Torvalds 				local_irq_disable();
446a12bb444SBenjamin Herrenschmidt 				hard_irq_disable();
447ffdb5976SRusty Russell 				break;
448ffdb5976SRusty Russell 			case STOPMACHINE_RUN:
4493fc1f1e2STejun Heo 				if (is_active)
4508163bcacSHeiko Carstens 					err = smdata->fn(smdata->data);
451ffdb5976SRusty Russell 				break;
452ffdb5976SRusty Russell 			default:
453ffdb5976SRusty Russell 				break;
4541da177e4SLinus Torvalds 			}
4553fc1f1e2STejun Heo 			ack_state(smdata);
4561da177e4SLinus Torvalds 		}
457ffdb5976SRusty Russell 	} while (curstate != STOPMACHINE_EXIT);
4581da177e4SLinus Torvalds 
4591da177e4SLinus Torvalds 	local_irq_enable();
4603fc1f1e2STejun Heo 	return err;
461ffdb5976SRusty Russell }
4621da177e4SLinus Torvalds 
46341c7bb95SRusty Russell int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
4641da177e4SLinus Torvalds {
4653fc1f1e2STejun Heo 	struct stop_machine_data smdata = { .fn = fn, .data = data,
4663fc1f1e2STejun Heo 					    .num_threads = num_online_cpus(),
4673fc1f1e2STejun Heo 					    .active_cpus = cpus };
4681da177e4SLinus Torvalds 
4693fc1f1e2STejun Heo 	/* Set the initial state and stop all online cpus. */
4703fc1f1e2STejun Heo 	set_state(&smdata, STOPMACHINE_PREPARE);
4713fc1f1e2STejun Heo 	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
4721da177e4SLinus Torvalds }
4731da177e4SLinus Torvalds 
47441c7bb95SRusty Russell int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
4751da177e4SLinus Torvalds {
4761da177e4SLinus Torvalds 	int ret;
4771da177e4SLinus Torvalds 
4781da177e4SLinus Torvalds 	/* No CPUs can come up or down during this. */
47986ef5c9aSGautham R Shenoy 	get_online_cpus();
480eeec4fadSRusty Russell 	ret = __stop_machine(fn, data, cpus);
48186ef5c9aSGautham R Shenoy 	put_online_cpus();
4821da177e4SLinus Torvalds 	return ret;
4831da177e4SLinus Torvalds }
484eeec4fadSRusty Russell EXPORT_SYMBOL_GPL(stop_machine);
485bbf1bb3eSTejun Heo 
486bbf1bb3eSTejun Heo #endif	/* CONFIG_STOP_MACHINE */
487