xref: /linux/kernel/stop_machine.c (revision 60063497a95e716c9a689af3be2687d261f115b4)
11142d810STejun Heo /*
21142d810STejun Heo  * kernel/stop_machine.c
31142d810STejun Heo  *
41142d810STejun Heo  * Copyright (C) 2008, 2005	IBM Corporation.
51142d810STejun Heo  * Copyright (C) 2008, 2005	Rusty Russell rusty@rustcorp.com.au
61142d810STejun Heo  * Copyright (C) 2010		SUSE Linux Products GmbH
71142d810STejun Heo  * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
81142d810STejun Heo  *
91142d810STejun Heo  * This file is released under the GPLv2 and any later version.
10e5582ca2SRusty Russell  */
111142d810STejun Heo #include <linux/completion.h>
121da177e4SLinus Torvalds #include <linux/cpu.h>
131142d810STejun Heo #include <linux/init.h>
14ee527cd3SPrarit Bhargava #include <linux/kthread.h>
15ee527cd3SPrarit Bhargava #include <linux/module.h>
161142d810STejun Heo #include <linux/percpu.h>
17ee527cd3SPrarit Bhargava #include <linux/sched.h>
18ee527cd3SPrarit Bhargava #include <linux/stop_machine.h>
19a12bb444SBenjamin Herrenschmidt #include <linux/interrupt.h>
201142d810STejun Heo #include <linux/kallsyms.h>
21a12bb444SBenjamin Herrenschmidt 
22*60063497SArun Sharma #include <linux/atomic.h>
231142d810STejun Heo 
241142d810STejun Heo /*
251142d810STejun Heo  * Structure to determine completion condition and record errors.  May
261142d810STejun Heo  * be shared by works on different cpus.
271142d810STejun Heo  */
281142d810STejun Heo struct cpu_stop_done {
291142d810STejun Heo 	atomic_t		nr_todo;	/* nr left to execute */
301142d810STejun Heo 	bool			executed;	/* actually executed? */
311142d810STejun Heo 	int			ret;		/* collected return value */
321142d810STejun Heo 	struct completion	completion;	/* fired if nr_todo reaches 0 */
331142d810STejun Heo };
341142d810STejun Heo 
351142d810STejun Heo /* the actual stopper, one per every possible cpu, enabled on online cpus */
361142d810STejun Heo struct cpu_stopper {
371142d810STejun Heo 	spinlock_t		lock;
38878ae127SRichard Kennedy 	bool			enabled;	/* is this stopper enabled? */
391142d810STejun Heo 	struct list_head	works;		/* list of pending works */
401142d810STejun Heo 	struct task_struct	*thread;	/* stopper thread */
411142d810STejun Heo };
421142d810STejun Heo 
431142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
441142d810STejun Heo 
451142d810STejun Heo static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
461142d810STejun Heo {
471142d810STejun Heo 	memset(done, 0, sizeof(*done));
481142d810STejun Heo 	atomic_set(&done->nr_todo, nr_todo);
491142d810STejun Heo 	init_completion(&done->completion);
501142d810STejun Heo }
511142d810STejun Heo 
521142d810STejun Heo /* signal completion unless @done is NULL */
531142d810STejun Heo static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
541142d810STejun Heo {
551142d810STejun Heo 	if (done) {
561142d810STejun Heo 		if (executed)
571142d810STejun Heo 			done->executed = true;
581142d810STejun Heo 		if (atomic_dec_and_test(&done->nr_todo))
591142d810STejun Heo 			complete(&done->completion);
601142d810STejun Heo 	}
611142d810STejun Heo }
621142d810STejun Heo 
631142d810STejun Heo /* queue @work to @stopper.  if offline, @work is completed immediately */
641142d810STejun Heo static void cpu_stop_queue_work(struct cpu_stopper *stopper,
651142d810STejun Heo 				struct cpu_stop_work *work)
661142d810STejun Heo {
671142d810STejun Heo 	unsigned long flags;
681142d810STejun Heo 
691142d810STejun Heo 	spin_lock_irqsave(&stopper->lock, flags);
701142d810STejun Heo 
711142d810STejun Heo 	if (stopper->enabled) {
721142d810STejun Heo 		list_add_tail(&work->list, &stopper->works);
731142d810STejun Heo 		wake_up_process(stopper->thread);
741142d810STejun Heo 	} else
751142d810STejun Heo 		cpu_stop_signal_done(work->done, false);
761142d810STejun Heo 
771142d810STejun Heo 	spin_unlock_irqrestore(&stopper->lock, flags);
781142d810STejun Heo }
791142d810STejun Heo 
801142d810STejun Heo /**
811142d810STejun Heo  * stop_one_cpu - stop a cpu
821142d810STejun Heo  * @cpu: cpu to stop
831142d810STejun Heo  * @fn: function to execute
841142d810STejun Heo  * @arg: argument to @fn
851142d810STejun Heo  *
861142d810STejun Heo  * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
871142d810STejun Heo  * the highest priority preempting any task on the cpu and
881142d810STejun Heo  * monopolizing it.  This function returns after the execution is
891142d810STejun Heo  * complete.
901142d810STejun Heo  *
911142d810STejun Heo  * This function doesn't guarantee @cpu stays online till @fn
921142d810STejun Heo  * completes.  If @cpu goes down in the middle, execution may happen
931142d810STejun Heo  * partially or fully on different cpus.  @fn should either be ready
941142d810STejun Heo  * for that or the caller should ensure that @cpu stays online until
951142d810STejun Heo  * this function completes.
961142d810STejun Heo  *
971142d810STejun Heo  * CONTEXT:
981142d810STejun Heo  * Might sleep.
991142d810STejun Heo  *
1001142d810STejun Heo  * RETURNS:
1011142d810STejun Heo  * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
1021142d810STejun Heo  * otherwise, the return value of @fn.
1031142d810STejun Heo  */
1041142d810STejun Heo int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
1051142d810STejun Heo {
1061142d810STejun Heo 	struct cpu_stop_done done;
1071142d810STejun Heo 	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
1081142d810STejun Heo 
1091142d810STejun Heo 	cpu_stop_init_done(&done, 1);
1101142d810STejun Heo 	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
1111142d810STejun Heo 	wait_for_completion(&done.completion);
1121142d810STejun Heo 	return done.executed ? done.ret : -ENOENT;
1131142d810STejun Heo }
1141142d810STejun Heo 
1151142d810STejun Heo /**
1161142d810STejun Heo  * stop_one_cpu_nowait - stop a cpu but don't wait for completion
1171142d810STejun Heo  * @cpu: cpu to stop
1181142d810STejun Heo  * @fn: function to execute
1191142d810STejun Heo  * @arg: argument to @fn
1201142d810STejun Heo  *
1211142d810STejun Heo  * Similar to stop_one_cpu() but doesn't wait for completion.  The
1221142d810STejun Heo  * caller is responsible for ensuring @work_buf is currently unused
1231142d810STejun Heo  * and will remain untouched until stopper starts executing @fn.
1241142d810STejun Heo  *
1251142d810STejun Heo  * CONTEXT:
1261142d810STejun Heo  * Don't care.
1271142d810STejun Heo  */
1281142d810STejun Heo void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
1291142d810STejun Heo 			struct cpu_stop_work *work_buf)
1301142d810STejun Heo {
1311142d810STejun Heo 	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
1321142d810STejun Heo 	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
1331142d810STejun Heo }
1341142d810STejun Heo 
1351142d810STejun Heo /* static data for stop_cpus */
136192d8857SSuresh Siddha static DEFINE_MUTEX(stop_cpus_mutex);
1371142d810STejun Heo static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
1381142d810STejun Heo 
139fd7355baSTejun Heo static void queue_stop_cpus_work(const struct cpumask *cpumask,
140fd7355baSTejun Heo 				 cpu_stop_fn_t fn, void *arg,
141fd7355baSTejun Heo 				 struct cpu_stop_done *done)
1421142d810STejun Heo {
1431142d810STejun Heo 	struct cpu_stop_work *work;
1441142d810STejun Heo 	unsigned int cpu;
1451142d810STejun Heo 
1461142d810STejun Heo 	/* initialize works and done */
1471142d810STejun Heo 	for_each_cpu(cpu, cpumask) {
1481142d810STejun Heo 		work = &per_cpu(stop_cpus_work, cpu);
1491142d810STejun Heo 		work->fn = fn;
1501142d810STejun Heo 		work->arg = arg;
151fd7355baSTejun Heo 		work->done = done;
1521142d810STejun Heo 	}
1531142d810STejun Heo 
1541142d810STejun Heo 	/*
1551142d810STejun Heo 	 * Disable preemption while queueing to avoid getting
1561142d810STejun Heo 	 * preempted by a stopper which might wait for other stoppers
1571142d810STejun Heo 	 * to enter @fn which can lead to deadlock.
1581142d810STejun Heo 	 */
1591142d810STejun Heo 	preempt_disable();
1601142d810STejun Heo 	for_each_cpu(cpu, cpumask)
1611142d810STejun Heo 		cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
1621142d810STejun Heo 				    &per_cpu(stop_cpus_work, cpu));
1631142d810STejun Heo 	preempt_enable();
164fd7355baSTejun Heo }
1651142d810STejun Heo 
166fd7355baSTejun Heo static int __stop_cpus(const struct cpumask *cpumask,
167fd7355baSTejun Heo 		       cpu_stop_fn_t fn, void *arg)
168fd7355baSTejun Heo {
169fd7355baSTejun Heo 	struct cpu_stop_done done;
170fd7355baSTejun Heo 
171fd7355baSTejun Heo 	cpu_stop_init_done(&done, cpumask_weight(cpumask));
172fd7355baSTejun Heo 	queue_stop_cpus_work(cpumask, fn, arg, &done);
1731142d810STejun Heo 	wait_for_completion(&done.completion);
1741142d810STejun Heo 	return done.executed ? done.ret : -ENOENT;
1751142d810STejun Heo }
1761142d810STejun Heo 
1771142d810STejun Heo /**
1781142d810STejun Heo  * stop_cpus - stop multiple cpus
1791142d810STejun Heo  * @cpumask: cpus to stop
1801142d810STejun Heo  * @fn: function to execute
1811142d810STejun Heo  * @arg: argument to @fn
1821142d810STejun Heo  *
1831142d810STejun Heo  * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
1841142d810STejun Heo  * @fn is run in a process context with the highest priority
1851142d810STejun Heo  * preempting any task on the cpu and monopolizing it.  This function
1861142d810STejun Heo  * returns after all executions are complete.
1871142d810STejun Heo  *
1881142d810STejun Heo  * This function doesn't guarantee the cpus in @cpumask stay online
1891142d810STejun Heo  * till @fn completes.  If some cpus go down in the middle, execution
1901142d810STejun Heo  * on the cpu may happen partially or fully on different cpus.  @fn
1911142d810STejun Heo  * should either be ready for that or the caller should ensure that
1921142d810STejun Heo  * the cpus stay online until this function completes.
1931142d810STejun Heo  *
1941142d810STejun Heo  * All stop_cpus() calls are serialized making it safe for @fn to wait
1951142d810STejun Heo  * for all cpus to start executing it.
1961142d810STejun Heo  *
1971142d810STejun Heo  * CONTEXT:
1981142d810STejun Heo  * Might sleep.
1991142d810STejun Heo  *
2001142d810STejun Heo  * RETURNS:
2011142d810STejun Heo  * -ENOENT if @fn(@arg) was not executed at all because all cpus in
2021142d810STejun Heo  * @cpumask were offline; otherwise, 0 if all executions of @fn
2031142d810STejun Heo  * returned 0, any non zero return value if any returned non zero.
2041142d810STejun Heo  */
2051142d810STejun Heo int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
2061142d810STejun Heo {
2071142d810STejun Heo 	int ret;
2081142d810STejun Heo 
2091142d810STejun Heo 	/* static works are used, process one request at a time */
2101142d810STejun Heo 	mutex_lock(&stop_cpus_mutex);
2111142d810STejun Heo 	ret = __stop_cpus(cpumask, fn, arg);
2121142d810STejun Heo 	mutex_unlock(&stop_cpus_mutex);
2131142d810STejun Heo 	return ret;
2141142d810STejun Heo }
2151142d810STejun Heo 
2161142d810STejun Heo /**
2171142d810STejun Heo  * try_stop_cpus - try to stop multiple cpus
2181142d810STejun Heo  * @cpumask: cpus to stop
2191142d810STejun Heo  * @fn: function to execute
2201142d810STejun Heo  * @arg: argument to @fn
2211142d810STejun Heo  *
2221142d810STejun Heo  * Identical to stop_cpus() except that it fails with -EAGAIN if
2231142d810STejun Heo  * someone else is already using the facility.
2241142d810STejun Heo  *
2251142d810STejun Heo  * CONTEXT:
2261142d810STejun Heo  * Might sleep.
2271142d810STejun Heo  *
2281142d810STejun Heo  * RETURNS:
2291142d810STejun Heo  * -EAGAIN if someone else is already stopping cpus, -ENOENT if
2301142d810STejun Heo  * @fn(@arg) was not executed at all because all cpus in @cpumask were
2311142d810STejun Heo  * offline; otherwise, 0 if all executions of @fn returned 0, any non
2321142d810STejun Heo  * zero return value if any returned non zero.
2331142d810STejun Heo  */
2341142d810STejun Heo int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
2351142d810STejun Heo {
2361142d810STejun Heo 	int ret;
2371142d810STejun Heo 
2381142d810STejun Heo 	/* static works are used, process one request at a time */
2391142d810STejun Heo 	if (!mutex_trylock(&stop_cpus_mutex))
2401142d810STejun Heo 		return -EAGAIN;
2411142d810STejun Heo 	ret = __stop_cpus(cpumask, fn, arg);
2421142d810STejun Heo 	mutex_unlock(&stop_cpus_mutex);
2431142d810STejun Heo 	return ret;
2441142d810STejun Heo }
2451142d810STejun Heo 
2461142d810STejun Heo static int cpu_stopper_thread(void *data)
2471142d810STejun Heo {
2481142d810STejun Heo 	struct cpu_stopper *stopper = data;
2491142d810STejun Heo 	struct cpu_stop_work *work;
2501142d810STejun Heo 	int ret;
2511142d810STejun Heo 
2521142d810STejun Heo repeat:
2531142d810STejun Heo 	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
2541142d810STejun Heo 
2551142d810STejun Heo 	if (kthread_should_stop()) {
2561142d810STejun Heo 		__set_current_state(TASK_RUNNING);
2571142d810STejun Heo 		return 0;
2581142d810STejun Heo 	}
2591142d810STejun Heo 
2601142d810STejun Heo 	work = NULL;
2611142d810STejun Heo 	spin_lock_irq(&stopper->lock);
2621142d810STejun Heo 	if (!list_empty(&stopper->works)) {
2631142d810STejun Heo 		work = list_first_entry(&stopper->works,
2641142d810STejun Heo 					struct cpu_stop_work, list);
2651142d810STejun Heo 		list_del_init(&work->list);
2661142d810STejun Heo 	}
2671142d810STejun Heo 	spin_unlock_irq(&stopper->lock);
2681142d810STejun Heo 
2691142d810STejun Heo 	if (work) {
2701142d810STejun Heo 		cpu_stop_fn_t fn = work->fn;
2711142d810STejun Heo 		void *arg = work->arg;
2721142d810STejun Heo 		struct cpu_stop_done *done = work->done;
273ca51c5a7SRakib Mullick 		char ksym_buf[KSYM_NAME_LEN] __maybe_unused;
2741142d810STejun Heo 
2751142d810STejun Heo 		__set_current_state(TASK_RUNNING);
2761142d810STejun Heo 
2771142d810STejun Heo 		/* cpu stop callbacks are not allowed to sleep */
2781142d810STejun Heo 		preempt_disable();
2791142d810STejun Heo 
2801142d810STejun Heo 		ret = fn(arg);
2811142d810STejun Heo 		if (ret)
2821142d810STejun Heo 			done->ret = ret;
2831142d810STejun Heo 
2841142d810STejun Heo 		/* restore preemption and check it's still balanced */
2851142d810STejun Heo 		preempt_enable();
2861142d810STejun Heo 		WARN_ONCE(preempt_count(),
2871142d810STejun Heo 			  "cpu_stop: %s(%p) leaked preempt count\n",
2881142d810STejun Heo 			  kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
2891142d810STejun Heo 					  ksym_buf), arg);
2901142d810STejun Heo 
2911142d810STejun Heo 		cpu_stop_signal_done(done, true);
2921142d810STejun Heo 	} else
2931142d810STejun Heo 		schedule();
2941142d810STejun Heo 
2951142d810STejun Heo 	goto repeat;
2961142d810STejun Heo }
2971142d810STejun Heo 
29834f971f6SPeter Zijlstra extern void sched_set_stop_task(int cpu, struct task_struct *stop);
29934f971f6SPeter Zijlstra 
3001142d810STejun Heo /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
3011142d810STejun Heo static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
3021142d810STejun Heo 					   unsigned long action, void *hcpu)
3031142d810STejun Heo {
3041142d810STejun Heo 	unsigned int cpu = (unsigned long)hcpu;
3051142d810STejun Heo 	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
3061142d810STejun Heo 	struct task_struct *p;
3071142d810STejun Heo 
3081142d810STejun Heo 	switch (action & ~CPU_TASKS_FROZEN) {
3091142d810STejun Heo 	case CPU_UP_PREPARE:
3101142d810STejun Heo 		BUG_ON(stopper->thread || stopper->enabled ||
3111142d810STejun Heo 		       !list_empty(&stopper->works));
31294dcf29aSEric Dumazet 		p = kthread_create_on_node(cpu_stopper_thread,
31394dcf29aSEric Dumazet 					   stopper,
31494dcf29aSEric Dumazet 					   cpu_to_node(cpu),
31594dcf29aSEric Dumazet 					   "migration/%d", cpu);
3161142d810STejun Heo 		if (IS_ERR(p))
3174ce6494dSAkinobu Mita 			return notifier_from_errno(PTR_ERR(p));
3181142d810STejun Heo 		get_task_struct(p);
31934f971f6SPeter Zijlstra 		kthread_bind(p, cpu);
32034f971f6SPeter Zijlstra 		sched_set_stop_task(cpu, p);
3211142d810STejun Heo 		stopper->thread = p;
3221142d810STejun Heo 		break;
3231142d810STejun Heo 
3241142d810STejun Heo 	case CPU_ONLINE:
3251142d810STejun Heo 		/* strictly unnecessary, as first user will wake it */
3261142d810STejun Heo 		wake_up_process(stopper->thread);
3271142d810STejun Heo 		/* mark enabled */
3281142d810STejun Heo 		spin_lock_irq(&stopper->lock);
3291142d810STejun Heo 		stopper->enabled = true;
3301142d810STejun Heo 		spin_unlock_irq(&stopper->lock);
3311142d810STejun Heo 		break;
3321142d810STejun Heo 
3331142d810STejun Heo #ifdef CONFIG_HOTPLUG_CPU
3341142d810STejun Heo 	case CPU_UP_CANCELED:
33554e88fadSAmit K. Arora 	case CPU_POST_DEAD:
3369c6f7e43SIngo Molnar 	{
3379c6f7e43SIngo Molnar 		struct cpu_stop_work *work;
3389c6f7e43SIngo Molnar 
33934f971f6SPeter Zijlstra 		sched_set_stop_task(cpu, NULL);
3401142d810STejun Heo 		/* kill the stopper */
3411142d810STejun Heo 		kthread_stop(stopper->thread);
3421142d810STejun Heo 		/* drain remaining works */
3431142d810STejun Heo 		spin_lock_irq(&stopper->lock);
3441142d810STejun Heo 		list_for_each_entry(work, &stopper->works, list)
3451142d810STejun Heo 			cpu_stop_signal_done(work->done, false);
3461142d810STejun Heo 		stopper->enabled = false;
3471142d810STejun Heo 		spin_unlock_irq(&stopper->lock);
3481142d810STejun Heo 		/* release the stopper */
3491142d810STejun Heo 		put_task_struct(stopper->thread);
3501142d810STejun Heo 		stopper->thread = NULL;
3511142d810STejun Heo 		break;
3529c6f7e43SIngo Molnar 	}
3531142d810STejun Heo #endif
3541142d810STejun Heo 	}
3551142d810STejun Heo 
3561142d810STejun Heo 	return NOTIFY_OK;
3571142d810STejun Heo }
3581142d810STejun Heo 
3591142d810STejun Heo /*
3601142d810STejun Heo  * Give it a higher priority so that cpu stopper is available to other
3611142d810STejun Heo  * cpu notifiers.  It currently shares the same priority as sched
3621142d810STejun Heo  * migration_notifier.
3631142d810STejun Heo  */
3641142d810STejun Heo static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
3651142d810STejun Heo 	.notifier_call	= cpu_stop_cpu_callback,
3661142d810STejun Heo 	.priority	= 10,
3671142d810STejun Heo };
3681142d810STejun Heo 
3691142d810STejun Heo static int __init cpu_stop_init(void)
3701142d810STejun Heo {
3711142d810STejun Heo 	void *bcpu = (void *)(long)smp_processor_id();
3721142d810STejun Heo 	unsigned int cpu;
3731142d810STejun Heo 	int err;
3741142d810STejun Heo 
3751142d810STejun Heo 	for_each_possible_cpu(cpu) {
3761142d810STejun Heo 		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
3771142d810STejun Heo 
3781142d810STejun Heo 		spin_lock_init(&stopper->lock);
3791142d810STejun Heo 		INIT_LIST_HEAD(&stopper->works);
3801142d810STejun Heo 	}
3811142d810STejun Heo 
3821142d810STejun Heo 	/* start one for the boot cpu */
3831142d810STejun Heo 	err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
3841142d810STejun Heo 				    bcpu);
3854ce6494dSAkinobu Mita 	BUG_ON(err != NOTIFY_OK);
3861142d810STejun Heo 	cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
3871142d810STejun Heo 	register_cpu_notifier(&cpu_stop_cpu_notifier);
3881142d810STejun Heo 
3891142d810STejun Heo 	return 0;
3901142d810STejun Heo }
3911142d810STejun Heo early_initcall(cpu_stop_init);
3921da177e4SLinus Torvalds 
393bbf1bb3eSTejun Heo #ifdef CONFIG_STOP_MACHINE
394bbf1bb3eSTejun Heo 
395ffdb5976SRusty Russell /* This controls the threads on each CPU. */
3961da177e4SLinus Torvalds enum stopmachine_state {
397ffdb5976SRusty Russell 	/* Dummy starting state for thread. */
398ffdb5976SRusty Russell 	STOPMACHINE_NONE,
399ffdb5976SRusty Russell 	/* Awaiting everyone to be scheduled. */
4001da177e4SLinus Torvalds 	STOPMACHINE_PREPARE,
401ffdb5976SRusty Russell 	/* Disable interrupts. */
4021da177e4SLinus Torvalds 	STOPMACHINE_DISABLE_IRQ,
403ffdb5976SRusty Russell 	/* Run the function */
4045c2aed62SJason Baron 	STOPMACHINE_RUN,
405ffdb5976SRusty Russell 	/* Exit */
4061da177e4SLinus Torvalds 	STOPMACHINE_EXIT,
4071da177e4SLinus Torvalds };
4081da177e4SLinus Torvalds 
4095c2aed62SJason Baron struct stop_machine_data {
4105c2aed62SJason Baron 	int			(*fn)(void *);
4115c2aed62SJason Baron 	void			*data;
4123fc1f1e2STejun Heo 	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
4133fc1f1e2STejun Heo 	unsigned int		num_threads;
4143fc1f1e2STejun Heo 	const struct cpumask	*active_cpus;
4153fc1f1e2STejun Heo 
4163fc1f1e2STejun Heo 	enum stopmachine_state	state;
4173fc1f1e2STejun Heo 	atomic_t		thread_ack;
418ffdb5976SRusty Russell };
4195c2aed62SJason Baron 
4203fc1f1e2STejun Heo static void set_state(struct stop_machine_data *smdata,
4213fc1f1e2STejun Heo 		      enum stopmachine_state newstate)
4221da177e4SLinus Torvalds {
423ffdb5976SRusty Russell 	/* Reset ack counter. */
4243fc1f1e2STejun Heo 	atomic_set(&smdata->thread_ack, smdata->num_threads);
425ffdb5976SRusty Russell 	smp_wmb();
4263fc1f1e2STejun Heo 	smdata->state = newstate;
427ffdb5976SRusty Russell }
4281da177e4SLinus Torvalds 
429ffdb5976SRusty Russell /* Last one to ack a state moves to the next state. */
4303fc1f1e2STejun Heo static void ack_state(struct stop_machine_data *smdata)
431ffdb5976SRusty Russell {
4323fc1f1e2STejun Heo 	if (atomic_dec_and_test(&smdata->thread_ack))
4333fc1f1e2STejun Heo 		set_state(smdata, smdata->state + 1);
434ffdb5976SRusty Russell }
435d8cb7c1dSAndrew Morton 
4363fc1f1e2STejun Heo /* This is the cpu_stop function which stops the CPU. */
4373fc1f1e2STejun Heo static int stop_machine_cpu_stop(void *data)
438ffdb5976SRusty Russell {
4393fc1f1e2STejun Heo 	struct stop_machine_data *smdata = data;
440ffdb5976SRusty Russell 	enum stopmachine_state curstate = STOPMACHINE_NONE;
4413fc1f1e2STejun Heo 	int cpu = smp_processor_id(), err = 0;
442f740e6cdSTejun Heo 	unsigned long flags;
4433fc1f1e2STejun Heo 	bool is_active;
4441da177e4SLinus Torvalds 
445f740e6cdSTejun Heo 	/*
446f740e6cdSTejun Heo 	 * When called from stop_machine_from_inactive_cpu(), irq might
447f740e6cdSTejun Heo 	 * already be disabled.  Save the state and restore it on exit.
448f740e6cdSTejun Heo 	 */
449f740e6cdSTejun Heo 	local_save_flags(flags);
450f740e6cdSTejun Heo 
4513fc1f1e2STejun Heo 	if (!smdata->active_cpus)
4523fc1f1e2STejun Heo 		is_active = cpu == cpumask_first(cpu_online_mask);
4533fc1f1e2STejun Heo 	else
4543fc1f1e2STejun Heo 		is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
4553fc1f1e2STejun Heo 
4561da177e4SLinus Torvalds 	/* Simple state machine */
457ffdb5976SRusty Russell 	do {
458ffdb5976SRusty Russell 		/* Chill out and ensure we re-read stopmachine_state. */
459ffdb5976SRusty Russell 		cpu_relax();
4603fc1f1e2STejun Heo 		if (smdata->state != curstate) {
4613fc1f1e2STejun Heo 			curstate = smdata->state;
462ffdb5976SRusty Russell 			switch (curstate) {
463ffdb5976SRusty Russell 			case STOPMACHINE_DISABLE_IRQ:
4641da177e4SLinus Torvalds 				local_irq_disable();
465a12bb444SBenjamin Herrenschmidt 				hard_irq_disable();
466ffdb5976SRusty Russell 				break;
467ffdb5976SRusty Russell 			case STOPMACHINE_RUN:
4683fc1f1e2STejun Heo 				if (is_active)
4698163bcacSHeiko Carstens 					err = smdata->fn(smdata->data);
470ffdb5976SRusty Russell 				break;
471ffdb5976SRusty Russell 			default:
472ffdb5976SRusty Russell 				break;
4731da177e4SLinus Torvalds 			}
4743fc1f1e2STejun Heo 			ack_state(smdata);
4751da177e4SLinus Torvalds 		}
476ffdb5976SRusty Russell 	} while (curstate != STOPMACHINE_EXIT);
4771da177e4SLinus Torvalds 
478f740e6cdSTejun Heo 	local_irq_restore(flags);
4793fc1f1e2STejun Heo 	return err;
480ffdb5976SRusty Russell }
4811da177e4SLinus Torvalds 
48241c7bb95SRusty Russell int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
4831da177e4SLinus Torvalds {
4843fc1f1e2STejun Heo 	struct stop_machine_data smdata = { .fn = fn, .data = data,
4853fc1f1e2STejun Heo 					    .num_threads = num_online_cpus(),
4863fc1f1e2STejun Heo 					    .active_cpus = cpus };
4871da177e4SLinus Torvalds 
4883fc1f1e2STejun Heo 	/* Set the initial state and stop all online cpus. */
4893fc1f1e2STejun Heo 	set_state(&smdata, STOPMACHINE_PREPARE);
4903fc1f1e2STejun Heo 	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
4911da177e4SLinus Torvalds }
4921da177e4SLinus Torvalds 
49341c7bb95SRusty Russell int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
4941da177e4SLinus Torvalds {
4951da177e4SLinus Torvalds 	int ret;
4961da177e4SLinus Torvalds 
4971da177e4SLinus Torvalds 	/* No CPUs can come up or down during this. */
49886ef5c9aSGautham R Shenoy 	get_online_cpus();
499eeec4fadSRusty Russell 	ret = __stop_machine(fn, data, cpus);
50086ef5c9aSGautham R Shenoy 	put_online_cpus();
5011da177e4SLinus Torvalds 	return ret;
5021da177e4SLinus Torvalds }
503eeec4fadSRusty Russell EXPORT_SYMBOL_GPL(stop_machine);
504bbf1bb3eSTejun Heo 
505f740e6cdSTejun Heo /**
506f740e6cdSTejun Heo  * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU
507f740e6cdSTejun Heo  * @fn: the function to run
508f740e6cdSTejun Heo  * @data: the data ptr for the @fn()
509f740e6cdSTejun Heo  * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
510f740e6cdSTejun Heo  *
511f740e6cdSTejun Heo  * This is identical to stop_machine() but can be called from a CPU which
512f740e6cdSTejun Heo  * is not active.  The local CPU is in the process of hotplug (so no other
513f740e6cdSTejun Heo  * CPU hotplug can start) and not marked active and doesn't have enough
514f740e6cdSTejun Heo  * context to sleep.
515f740e6cdSTejun Heo  *
516f740e6cdSTejun Heo  * This function provides stop_machine() functionality for such state by
517f740e6cdSTejun Heo  * using busy-wait for synchronization and executing @fn directly for local
518f740e6cdSTejun Heo  * CPU.
519f740e6cdSTejun Heo  *
520f740e6cdSTejun Heo  * CONTEXT:
521f740e6cdSTejun Heo  * Local CPU is inactive.  Temporarily stops all active CPUs.
522f740e6cdSTejun Heo  *
523f740e6cdSTejun Heo  * RETURNS:
524f740e6cdSTejun Heo  * 0 if all executions of @fn returned 0, any non zero return value if any
525f740e6cdSTejun Heo  * returned non zero.
526f740e6cdSTejun Heo  */
527f740e6cdSTejun Heo int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
528f740e6cdSTejun Heo 				  const struct cpumask *cpus)
529f740e6cdSTejun Heo {
530f740e6cdSTejun Heo 	struct stop_machine_data smdata = { .fn = fn, .data = data,
531f740e6cdSTejun Heo 					    .active_cpus = cpus };
532f740e6cdSTejun Heo 	struct cpu_stop_done done;
533f740e6cdSTejun Heo 	int ret;
534f740e6cdSTejun Heo 
535f740e6cdSTejun Heo 	/* Local CPU must be inactive and CPU hotplug in progress. */
536f740e6cdSTejun Heo 	BUG_ON(cpu_active(raw_smp_processor_id()));
537f740e6cdSTejun Heo 	smdata.num_threads = num_active_cpus() + 1;	/* +1 for local */
538f740e6cdSTejun Heo 
539f740e6cdSTejun Heo 	/* No proper task established and can't sleep - busy wait for lock. */
540f740e6cdSTejun Heo 	while (!mutex_trylock(&stop_cpus_mutex))
541f740e6cdSTejun Heo 		cpu_relax();
542f740e6cdSTejun Heo 
543f740e6cdSTejun Heo 	/* Schedule work on other CPUs and execute directly for local CPU */
544f740e6cdSTejun Heo 	set_state(&smdata, STOPMACHINE_PREPARE);
545f740e6cdSTejun Heo 	cpu_stop_init_done(&done, num_active_cpus());
546f740e6cdSTejun Heo 	queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata,
547f740e6cdSTejun Heo 			     &done);
548f740e6cdSTejun Heo 	ret = stop_machine_cpu_stop(&smdata);
549f740e6cdSTejun Heo 
550f740e6cdSTejun Heo 	/* Busy wait for completion. */
551f740e6cdSTejun Heo 	while (!completion_done(&done.completion))
552f740e6cdSTejun Heo 		cpu_relax();
553f740e6cdSTejun Heo 
554f740e6cdSTejun Heo 	mutex_unlock(&stop_cpus_mutex);
555f740e6cdSTejun Heo 	return ret ?: done.ret;
556f740e6cdSTejun Heo }
557f740e6cdSTejun Heo 
558bbf1bb3eSTejun Heo #endif	/* CONFIG_STOP_MACHINE */
559